diff --git a/.gitattributes b/.gitattributes
index f8bfaba2369d1b33e1ecb6e29023bda93e166aa1..7b55085428a6cb5f5b982ba16f50ab4391a29616 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -339,3 +339,6 @@ my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/opencv_pyth
 my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/opencv_python.libs/libxkbcommon-71ae2972.so.0.0.0 filter=lfs diff=lfs merge=lfs -text
 my_container_sandbox/workspace/anaconda3/lib/libnvvm.so.4.0.0 filter=lfs diff=lfs merge=lfs -text
 my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/_isotonic.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/matplotlib/ft2font.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchvision.libs/libnvjpeg.90286a3c.so.11 filter=lfs diff=lfs merge=lfs -text
+my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/kiwisolver/_cext.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/kiwisolver/_cext.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/kiwisolver/_cext.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..0402e753f8fd64ad86cc19275d49619bb6cbcc59
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/kiwisolver/_cext.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:847be32e761c90dedc0a587a45e0b9b1bc1d9c6a9322e3e278bad141fb8ebe90
+size 4260107
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/matplotlib/ft2font.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/matplotlib/ft2font.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..6045bdc720cc7f76c2385a98e9e0172d000a1c74
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/matplotlib/ft2font.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03e5cffa72fb1aaef4acef0341ad5f6bb19501fcb384aaa7e7321e570813ac51
+size 4120999
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/dataloader.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/dataloader.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e1b9509c5b90a7dbf295a3244982df99b122b38
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/dataloader.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_saver.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_saver.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e83cdbfefa404574f42da5cdc25b76a54d817cd
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_saver.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_writer.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_writer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce9bd6845fa9db3271d6ed1fe6d321458b60e608
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/data/__pycache__/png_writer.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/__pycache__/utils.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0321835762512cb47ed0f39d5a17658d802c6f70
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/__pycache__/utils.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/acti_norm.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/acti_norm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87eae283716b745c617f5a4383fe012f3841b0b8
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/acti_norm.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/activation.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/activation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d0c6be3644ec4b37d256124fa0a8ac550f59087
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/activation.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/convolutions.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/convolutions.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a66af22c8acfa4538dee3fb1d807fbf85dc13087
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/convolutions.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dints_block.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dints_block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4beb4f36860ead2b0e3547cb588863c97b7c6311
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dints_block.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/downsample.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/downsample.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67f1d236a85b44bb6e0ba0a36f35ddc36831b33b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/downsample.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dynunet_block.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dynunet_block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a523dfdd6d4a8da792304471c823aab75049ce37
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/dynunet_block.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/localnet_block.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/localnet_block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58d4d7ae5aab45c92720cca371c90af74e168d4b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/localnet_block.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/mlp.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/mlp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d6007222a759d61ccd536a3e51458a2aad23d28b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/mlp.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/regunet_block.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/regunet_block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad7e367c5b8b2cd71029bea746a255dd6a5efc93
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/regunet_block.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/selfattention.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/selfattention.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9993abf17e1d3b53778320100f747d1bc1dff641
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/selfattention.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/squeeze_and_excitation.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/squeeze_and_excitation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dcf70642c12dcae085acadf520552dbd1ec0891d
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/squeeze_and_excitation.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/transformerblock.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/transformerblock.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95a700f7c358653601383610e56f62cdbbc6568d
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/transformerblock.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/warp.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/warp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4fae101cf833aab65a5939020e8d8b26aa22fee
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/blocks/__pycache__/warp.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f122dccee6646565a9b244ebce15412ecbc12326
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .convutils import calculate_out_shape, gaussian_1d, polyval, same_padding, stride_minus_kernel_padding
+from .drop_path import DropPath
+from .factories import Act, Conv, Dropout, LayerFactory, Norm, Pad, Pool, split_args
+from .filtering import BilateralFilter, PHLFilter
+from .gmm import GaussianMixtureModel
+from .simplelayers import (
+    LLTM,
+    ChannelPad,
+    Flatten,
+    GaussianFilter,
+    HilbertTransform,
+    Reshape,
+    SavitzkyGolayFilter,
+    SkipConnection,
+    apply_filter,
+    separable_filtering,
+)
+from .spatial_transforms import AffineTransform, grid_count, grid_grad, grid_pull, grid_push
+from .utils import get_act_layer, get_dropout_layer, get_norm_layer, get_pool_layer
+from .weight_init import _no_grad_trunc_normal_, trunc_normal_
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__pycache__/utils.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..537c15a6010c4e40c1b215ec56ac1e75ef977117
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/__pycache__/utils.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/convutils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/convutils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e9ce954e8d6eaa5398f0eed2283db239dd204a8
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/convutils.py
@@ -0,0 +1,227 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+
+__all__ = ["same_padding", "stride_minus_kernel_padding", "calculate_out_shape", "gaussian_1d", "polyval"]
+
+
+def same_padding(
+    kernel_size: Union[Sequence[int], int], dilation: Union[Sequence[int], int] = 1
+) -> Union[Tuple[int, ...], int]:
+    """
+    Return the padding value needed to ensure a convolution using the given kernel size produces an output of the same
+    shape as the input for a stride of 1, otherwise ensure a shape of the input divided by the stride rounded down.
+
+    Raises:
+        NotImplementedError: When ``np.any((kernel_size - 1) * dilation % 2 == 1)``.
+
+    """
+
+    kernel_size_np = np.atleast_1d(kernel_size)
+    dilation_np = np.atleast_1d(dilation)
+
+    if np.any((kernel_size_np - 1) * dilation % 2 == 1):
+        raise NotImplementedError(
+            f"Same padding not available for kernel_size={kernel_size_np} and dilation={dilation_np}."
+        )
+
+    padding_np = (kernel_size_np - 1) / 2 * dilation_np
+    padding = tuple(int(p) for p in padding_np)
+
+    return padding if len(padding) > 1 else padding[0]
+
+
+def stride_minus_kernel_padding(
+    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int]
+) -> Union[Tuple[int, ...], int]:
+    kernel_size_np = np.atleast_1d(kernel_size)
+    stride_np = np.atleast_1d(stride)
+
+    out_padding_np = stride_np - kernel_size_np
+    out_padding = tuple(int(p) for p in out_padding_np)
+
+    return out_padding if len(out_padding) > 1 else out_padding[0]
+
+
+def calculate_out_shape(
+    in_shape: Union[Sequence[int], int, np.ndarray],
+    kernel_size: Union[Sequence[int], int],
+    stride: Union[Sequence[int], int],
+    padding: Union[Sequence[int], int],
+) -> Union[Tuple[int, ...], int]:
+    """
+    Calculate the output tensor shape when applying a convolution to a tensor of shape `inShape` with kernel size
+    `kernel_size`, stride value `stride`, and input padding value `padding`. All arguments can be scalars or multiple
+    values, return value is a scalar if all inputs are scalars.
+    """
+    in_shape_np = np.atleast_1d(in_shape)
+    kernel_size_np = np.atleast_1d(kernel_size)
+    stride_np = np.atleast_1d(stride)
+    padding_np = np.atleast_1d(padding)
+
+    out_shape_np = ((in_shape_np - kernel_size_np + padding_np + padding_np) // stride_np) + 1
+    out_shape = tuple(int(s) for s in out_shape_np)
+
+    return out_shape if len(out_shape) > 1 else out_shape[0]
+
+
+def gaussian_1d(
+    sigma: torch.Tensor, truncated: float = 4.0, approx: str = "erf", normalize: bool = False
+) -> torch.Tensor:
+    """
+    one dimensional Gaussian kernel.
+
+    Args:
+        sigma: std of the kernel
+        truncated: tail length
+        approx: discrete Gaussian kernel type, available options are "erf", "sampled", and "scalespace".
+
+            - ``erf`` approximation interpolates the error function;
+            - ``sampled`` uses a sampled Gaussian kernel;
+            - ``scalespace`` corresponds to
+              https://en.wikipedia.org/wiki/Scale_space_implementation#The_discrete_Gaussian_kernel
+              based on the modified Bessel functions.
+
+        normalize: whether to normalize the kernel with `kernel.sum()`.
+
+    Raises:
+        ValueError: When ``truncated`` is non-positive.
+
+    Returns:
+        1D torch tensor
+
+    """
+    sigma = torch.as_tensor(sigma, dtype=torch.float, device=sigma.device if isinstance(sigma, torch.Tensor) else None)
+    device = sigma.device
+    if truncated <= 0.0:
+        raise ValueError(f"truncated must be positive, got {truncated}.")
+    tail = int(max(float(sigma) * truncated, 0.5) + 0.5)
+    if approx.lower() == "erf":
+        x = torch.arange(-tail, tail + 1, dtype=torch.float, device=device)
+        t = 0.70710678 / torch.abs(sigma)
+        out = 0.5 * ((t * (x + 0.5)).erf() - (t * (x - 0.5)).erf())
+        out = out.clamp(min=0)
+    elif approx.lower() == "sampled":
+        x = torch.arange(-tail, tail + 1, dtype=torch.float, device=sigma.device)
+        out = torch.exp(-0.5 / (sigma * sigma) * x**2)
+        if not normalize:  # compute the normalizer
+            out = out / (2.5066282 * sigma)
+    elif approx.lower() == "scalespace":
+        sigma2 = sigma * sigma
+        out_pos: List[Optional[torch.Tensor]] = [None] * (tail + 1)
+        out_pos[0] = _modified_bessel_0(sigma2)
+        out_pos[1] = _modified_bessel_1(sigma2)
+        for k in range(2, len(out_pos)):
+            out_pos[k] = _modified_bessel_i(k, sigma2)
+        out = out_pos[:0:-1]
+        out.extend(out_pos)
+        out = torch.stack(out) * torch.exp(-sigma2)
+    else:
+        raise NotImplementedError(f"Unsupported option: approx='{approx}'.")
+    return out / out.sum() if normalize else out  # type: ignore
+
+
+def polyval(coef, x) -> torch.Tensor:
+    """
+    Evaluates the polynomial defined by `coef` at `x`.
+
+    For a 1D sequence of coef (length n), evaluate::
+
+        y = coef[n-1] + x * (coef[n-2] + ... + x * (coef[1] + x * coef[0]))
+
+    Args:
+        coef: a sequence of floats representing the coefficients of the polynomial
+        x: float or a sequence of floats representing the variable of the polynomial
+
+    Returns:
+        1D torch tensor
+    """
+    device = x.device if isinstance(x, torch.Tensor) else None
+    coef = torch.as_tensor(coef, dtype=torch.float, device=device)
+    if coef.ndim == 0 or (len(coef) < 1):
+        return torch.zeros(x.shape)
+    x = torch.as_tensor(x, dtype=torch.float, device=device)
+    ans = coef[0]
+    for c in coef[1:]:
+        ans = ans * x + c
+    return ans  # type: ignore
+
+
+def _modified_bessel_0(x: torch.Tensor) -> torch.Tensor:
+    x = torch.as_tensor(x, dtype=torch.float, device=x.device if isinstance(x, torch.Tensor) else None)
+    if torch.abs(x) < 3.75:
+        y = x * x / 14.0625
+        return polyval([0.45813e-2, 0.360768e-1, 0.2659732, 1.2067492, 3.0899424, 3.5156229, 1.0], y)
+    ax = torch.abs(x)
+    y = 3.75 / ax
+    _coef = [
+        0.392377e-2,
+        -0.1647633e-1,
+        0.2635537e-1,
+        -0.2057706e-1,
+        0.916281e-2,
+        -0.157565e-2,
+        0.225319e-2,
+        0.1328592e-1,
+        0.39894228,
+    ]
+    return polyval(_coef, y) * torch.exp(ax) / torch.sqrt(ax)
+
+
+def _modified_bessel_1(x: torch.Tensor) -> torch.Tensor:
+    x = torch.as_tensor(x, dtype=torch.float, device=x.device if isinstance(x, torch.Tensor) else None)
+    if torch.abs(x) < 3.75:
+        y = x * x / 14.0625
+        _coef = [0.32411e-3, 0.301532e-2, 0.2658733e-1, 0.15084934, 0.51498869, 0.87890594, 0.5]
+        return torch.abs(x) * polyval(_coef, y)
+    ax = torch.abs(x)
+    y = 3.75 / ax
+    _coef = [
+        -0.420059e-2,
+        0.1787654e-1,
+        -0.2895312e-1,
+        0.2282967e-1,
+        -0.1031555e-1,
+        0.163801e-2,
+        -0.362018e-2,
+        -0.3988024e-1,
+        0.39894228,
+    ]
+    ans = polyval(_coef, y) * torch.exp(ax) / torch.sqrt(ax)
+    return -ans if x < 0.0 else ans
+
+
+def _modified_bessel_i(n: int, x: torch.Tensor) -> torch.Tensor:
+    if n < 2:
+        raise ValueError(f"n must be greater than 1, got n={n}.")
+    x = torch.as_tensor(x, dtype=torch.float, device=x.device if isinstance(x, torch.Tensor) else None)
+    if x == 0.0:
+        return x
+    device = x.device
+    tox = 2.0 / torch.abs(x)
+    ans, bip, bi = torch.tensor(0.0, device=device), torch.tensor(0.0, device=device), torch.tensor(1.0, device=device)
+    m = int(2 * (n + np.floor(np.sqrt(40.0 * n))))
+    for j in range(m, 0, -1):
+        bim = bip + float(j) * tox * bi
+        bip = bi
+        bi = bim
+        if abs(bi) > 1.0e10:
+            ans = ans * 1.0e-10
+            bi = bi * 1.0e-10
+            bip = bip * 1.0e-10
+        if j == n:
+            ans = bip
+    ans = ans * _modified_bessel_0(x) / bi
+    return -ans if x < 0.0 and (n % 2) == 1 else ans
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/drop_path.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/drop_path.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bb209ed25f2b8e82e41a61174b933f93cc2e647
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/drop_path.py
@@ -0,0 +1,45 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn as nn
+
+
+class DropPath(nn.Module):
+    """Stochastic drop paths per sample for residual blocks.
+    Based on:
+    https://github.com/rwightman/pytorch-image-models
+    """
+
+    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True) -> None:
+        """
+        Args:
+            drop_prob: drop path probability.
+            scale_by_keep: scaling by non-dropped probability.
+        """
+        super().__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+
+        if not (0 <= drop_prob <= 1):
+            raise ValueError("Drop path prob should be between 0 and 1.")
+
+    def drop_path(self, x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True):
+        if drop_prob == 0.0 or not training:
+            return x
+        keep_prob = 1 - drop_prob
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+        if keep_prob > 0.0 and scale_by_keep:
+            random_tensor.div_(keep_prob)
+        return x * random_tensor
+
+    def forward(self, x):
+        return self.drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/factories.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/factories.py
new file mode 100644
index 0000000000000000000000000000000000000000..89fe1912a5121a99770c28abb62064ad345d63fb
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/factories.py
@@ -0,0 +1,371 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Defines factories for creating layers in generic, extensible, and dimensionally independent ways. A separate factory
+object is created for each type of layer, and factory functions keyed to names are added to these objects. Whenever
+a layer is requested the factory name and any necessary arguments are passed to the factory object. The return value
+is typically a type but can be any callable producing a layer object.
+
+The factory objects contain functions keyed to names converted to upper case, these names can be referred to as members
+of the factory so that they can function as constant identifiers. eg. instance normalization is named `Norm.INSTANCE`.
+
+For example, to get a transpose convolution layer the name is needed and then a dimension argument is provided which is
+passed to the factory function:
+
+.. code-block:: python
+
+    dimension = 3
+    name = Conv.CONVTRANS
+    conv = Conv[name, dimension]
+
+This allows the `dimension` value to be set in the constructor, for example so that the dimensionality of a network is
+parameterizable. Not all factories require arguments after the name, the caller must be aware which are required.
+
+Defining new factories involves creating the object then associating it with factory functions:
+
+.. code-block:: python
+
+    fact = LayerFactory()
+
+    @fact.factory_function('test')
+    def make_something(x, y):
+        # do something with x and y to choose which layer type to return
+        return SomeLayerType
+    ...
+
+    # request object from factory TEST with 1 and 2 as values for x and y
+    layer = fact[fact.TEST, 1, 2]
+
+Typically the caller of a factory would know what arguments to pass (ie. the dimensionality of the requested type) but
+can be parameterized with the factory name and the arguments to pass to the created type at instantiation time:
+
+.. code-block:: python
+
+    def use_factory(fact_args):
+        fact_name, type_args = split_args
+        layer_type = fact[fact_name, 1, 2]
+        return layer_type(**type_args)
+    ...
+
+    kw_args = {'arg0':0, 'arg1':True}
+    layer = use_factory( (fact.TEST, kwargs) )
+"""
+
+import warnings
+from typing import Any, Callable, Dict, Tuple, Type, Union
+
+import torch
+import torch.nn as nn
+
+from monai.utils import look_up_option, optional_import
+
+InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
+
+
+__all__ = ["LayerFactory", "Dropout", "Norm", "Act", "Conv", "Pool", "Pad", "split_args"]
+
+
+class LayerFactory:
+    """
+    Factory object for creating layers, this uses given factory functions to actually produce the types or constructing
+    callables. These functions are referred to by name and can be added at any time.
+    """
+
+    def __init__(self) -> None:
+        self.factories: Dict[str, Callable] = {}
+
+    @property
+    def names(self) -> Tuple[str, ...]:
+        """
+        Produces all factory names.
+        """
+
+        return tuple(self.factories)
+
+    def add_factory_callable(self, name: str, func: Callable) -> None:
+        """
+        Add the factory function to this object under the given name.
+        """
+
+        self.factories[name.upper()] = func
+        self.__doc__ = (
+            "The supported member"
+            + ("s are: " if len(self.names) > 1 else " is: ")
+            + ", ".join(f"``{name}``" for name in self.names)
+            + ".\nPlease see :py:class:`monai.networks.layers.split_args` for additional args parsing."
+        )
+
+    def factory_function(self, name: str) -> Callable:
+        """
+        Decorator for adding a factory function with the given name.
+        """
+
+        def _add(func: Callable) -> Callable:
+            self.add_factory_callable(name, func)
+            return func
+
+        return _add
+
+    def get_constructor(self, factory_name: str, *args) -> Any:
+        """
+        Get the constructor for the given factory name and arguments.
+
+        Raises:
+            TypeError: When ``factory_name`` is not a ``str``.
+
+        """
+
+        if not isinstance(factory_name, str):
+            raise TypeError(f"factory_name must a str but is {type(factory_name).__name__}.")
+
+        func = look_up_option(factory_name.upper(), self.factories)
+        return func(*args)
+
+    def __getitem__(self, args) -> Any:
+        """
+        Get the given name or name/arguments pair. If `args` is a callable it is assumed to be the constructor
+        itself and is returned, otherwise it should be the factory name or a pair containing the name and arguments.
+        """
+
+        # `args[0]` is actually a type or constructor
+        if callable(args):
+            return args
+
+        # `args` is a factory name or a name with arguments
+        if isinstance(args, str):
+            name_obj, args = args, ()
+        else:
+            name_obj, *args = args
+
+        return self.get_constructor(name_obj, *args)
+
+    def __getattr__(self, key):
+        """
+        If `key` is a factory name, return it, otherwise behave as inherited. This allows referring to factory names
+        as if they were constants, eg. `Fact.FOO` for a factory Fact with factory function foo.
+        """
+
+        if key in self.factories:
+            return key
+
+        return super().__getattribute__(key)
+
+
+def split_args(args):
+    """
+    Split arguments in a way to be suitable for using with the factory types. If `args` is a string it's interpreted as
+    the type name.
+
+    Args:
+        args (str or a tuple of object name and kwarg dict): input arguments to be parsed.
+
+    Raises:
+        TypeError: When ``args`` type is not in ``Union[str, Tuple[Union[str, Callable], dict]]``.
+
+    Examples::
+
+        >>> act_type, args = split_args("PRELU")
+        >>> monai.networks.layers.Act[act_type]
+        <class 'torch.nn.modules.activation.PReLU'>
+
+        >>> act_type, args = split_args(("PRELU", {"num_parameters": 1, "init": 0.25}))
+        >>> monai.networks.layers.Act[act_type](**args)
+        PReLU(num_parameters=1)
+
+    """
+
+    if isinstance(args, str):
+        return args, {}
+    name_obj, name_args = args
+
+    if not isinstance(name_obj, (str, Callable)) or not isinstance(name_args, dict):
+        msg = "Layer specifiers must be single strings or pairs of the form (name/object-types, argument dict)"
+        raise TypeError(msg)
+
+    return name_obj, name_args
+
+
+# Define factories for these layer types
+
+Dropout = LayerFactory()
+Norm = LayerFactory()
+Act = LayerFactory()
+Conv = LayerFactory()
+Pool = LayerFactory()
+Pad = LayerFactory()
+
+
+@Dropout.factory_function("dropout")
+def dropout_factory(dim: int) -> Type[Union[nn.Dropout, nn.Dropout2d, nn.Dropout3d]]:
+    types = (nn.Dropout, nn.Dropout2d, nn.Dropout3d)
+    return types[dim - 1]
+
+
+@Dropout.factory_function("alphadropout")
+def alpha_dropout_factory(_dim):
+    return nn.AlphaDropout
+
+
+@Norm.factory_function("instance")
+def instance_factory(dim: int) -> Type[Union[nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d]]:
+    types = (nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d)
+    return types[dim - 1]
+
+
+@Norm.factory_function("batch")
+def batch_factory(dim: int) -> Type[Union[nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]]:
+    types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)
+    return types[dim - 1]
+
+
+@Norm.factory_function("group")
+def group_factory(_dim) -> Type[nn.GroupNorm]:
+    return nn.GroupNorm
+
+
+@Norm.factory_function("layer")
+def layer_factory(_dim) -> Type[nn.LayerNorm]:
+    return nn.LayerNorm
+
+
+@Norm.factory_function("localresponse")
+def local_response_factory(_dim) -> Type[nn.LocalResponseNorm]:
+    return nn.LocalResponseNorm
+
+
+@Norm.factory_function("syncbatch")
+def sync_batch_factory(_dim) -> Type[nn.SyncBatchNorm]:
+    return nn.SyncBatchNorm
+
+
+@Norm.factory_function("instance_nvfuser")
+def instance_nvfuser_factory(dim):
+    """
+    `InstanceNorm3dNVFuser` is a faster verison of InstanceNorm layer and implemented in `apex`.
+    It only supports 3d tensors as the input. It also requires to use with CUDA and non-Windows OS.
+    In this function, if the required library `apex.normalization.InstanceNorm3dNVFuser` does not exist,
+    `nn.InstanceNorm3d` will be returned instead.
+    This layer is based on a customized autograd function, which is not supported in TorchScript currently.
+    Please switch to use `nn.InstanceNorm3d` if TorchScript is necessary.
+
+    Please check the following link for more details about how to install `apex`:
+    https://github.com/NVIDIA/apex#installation
+
+    """
+    types = (nn.InstanceNorm1d, nn.InstanceNorm2d)
+    if dim != 3:
+        warnings.warn(f"`InstanceNorm3dNVFuser` only supports 3d cases, use {types[dim - 1]} instead.")
+        return types[dim - 1]
+    # test InstanceNorm3dNVFuser installation with a basic example
+    has_nvfuser_flag = has_nvfuser
+    if not torch.cuda.is_available():
+        return nn.InstanceNorm3d
+    try:
+        layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
+        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
+        out = layer(inp)
+        del inp, out, layer
+    except Exception:
+        has_nvfuser_flag = False
+    if not has_nvfuser_flag:
+        warnings.warn(
+            "`apex.normalization.InstanceNorm3dNVFuser` is not installed properly, use nn.InstanceNorm3d instead."
+        )
+        return nn.InstanceNorm3d
+    return InstanceNorm3dNVFuser
+
+
+Act.add_factory_callable("elu", lambda: nn.modules.ELU)
+Act.add_factory_callable("relu", lambda: nn.modules.ReLU)
+Act.add_factory_callable("leakyrelu", lambda: nn.modules.LeakyReLU)
+Act.add_factory_callable("prelu", lambda: nn.modules.PReLU)
+Act.add_factory_callable("relu6", lambda: nn.modules.ReLU6)
+Act.add_factory_callable("selu", lambda: nn.modules.SELU)
+Act.add_factory_callable("celu", lambda: nn.modules.CELU)
+Act.add_factory_callable("gelu", lambda: nn.modules.GELU)
+Act.add_factory_callable("sigmoid", lambda: nn.modules.Sigmoid)
+Act.add_factory_callable("tanh", lambda: nn.modules.Tanh)
+Act.add_factory_callable("softmax", lambda: nn.modules.Softmax)
+Act.add_factory_callable("logsoftmax", lambda: nn.modules.LogSoftmax)
+
+
+@Act.factory_function("swish")
+def swish_factory():
+    from monai.networks.blocks.activation import Swish
+
+    return Swish
+
+
+@Act.factory_function("memswish")
+def memswish_factory():
+    from monai.networks.blocks.activation import MemoryEfficientSwish
+
+    return MemoryEfficientSwish
+
+
+@Act.factory_function("mish")
+def mish_factory():
+    from monai.networks.blocks.activation import Mish
+
+    return Mish
+
+
+@Conv.factory_function("conv")
+def conv_factory(dim: int) -> Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]]:
+    types = (nn.Conv1d, nn.Conv2d, nn.Conv3d)
+    return types[dim - 1]
+
+
+@Conv.factory_function("convtrans")
+def convtrans_factory(dim: int) -> Type[Union[nn.ConvTranspose1d, nn.ConvTranspose2d, nn.ConvTranspose3d]]:
+    types = (nn.ConvTranspose1d, nn.ConvTranspose2d, nn.ConvTranspose3d)
+    return types[dim - 1]
+
+
+@Pool.factory_function("max")
+def maxpooling_factory(dim: int) -> Type[Union[nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d]]:
+    types = (nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d)
+    return types[dim - 1]
+
+
+@Pool.factory_function("adaptivemax")
+def adaptive_maxpooling_factory(
+    dim: int,
+) -> Type[Union[nn.AdaptiveMaxPool1d, nn.AdaptiveMaxPool2d, nn.AdaptiveMaxPool3d]]:
+    types = (nn.AdaptiveMaxPool1d, nn.AdaptiveMaxPool2d, nn.AdaptiveMaxPool3d)
+    return types[dim - 1]
+
+
+@Pool.factory_function("avg")
+def avgpooling_factory(dim: int) -> Type[Union[nn.AvgPool1d, nn.AvgPool2d, nn.AvgPool3d]]:
+    types = (nn.AvgPool1d, nn.AvgPool2d, nn.AvgPool3d)
+    return types[dim - 1]
+
+
+@Pool.factory_function("adaptiveavg")
+def adaptive_avgpooling_factory(
+    dim: int,
+) -> Type[Union[nn.AdaptiveAvgPool1d, nn.AdaptiveAvgPool2d, nn.AdaptiveAvgPool3d]]:
+    types = (nn.AdaptiveAvgPool1d, nn.AdaptiveAvgPool2d, nn.AdaptiveAvgPool3d)
+    return types[dim - 1]
+
+
+@Pad.factory_function("replicationpad")
+def replication_pad_factory(dim: int) -> Type[Union[nn.ReplicationPad1d, nn.ReplicationPad2d, nn.ReplicationPad3d]]:
+    types = (nn.ReplicationPad1d, nn.ReplicationPad2d, nn.ReplicationPad3d)
+    return types[dim - 1]
+
+
+@Pad.factory_function("constantpad")
+def constant_pad_factory(dim: int) -> Type[Union[nn.ConstantPad1d, nn.ConstantPad2d, nn.ConstantPad3d]]:
+    types = (nn.ConstantPad1d, nn.ConstantPad2d, nn.ConstantPad3d)
+    return types[dim - 1]
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/filtering.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/filtering.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbf925eba91d72e9f6b0136558c3156978247aaa
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/filtering.py
@@ -0,0 +1,101 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from monai.utils.module import optional_import
+
+_C, _ = optional_import("monai._C")
+
+__all__ = ["BilateralFilter", "PHLFilter"]
+
+
+class BilateralFilter(torch.autograd.Function):
+    """
+    Blurs the input tensor spatially whilst preserving edges. Can run on 1D, 2D, or 3D,
+    tensors (on top of Batch and Channel dimensions). Two implementations are provided,
+    an exact solution and a much faster approximation which uses a permutohedral lattice.
+
+    See:
+        https://en.wikipedia.org/wiki/Bilateral_filter
+        https://graphics.stanford.edu/papers/permutohedral/
+
+    Args:
+        input: input tensor.
+
+        spatial sigma: the standard deviation of the spatial blur. Higher values can
+            hurt performance when not using the approximate method (see fast approx).
+
+        color sigma: the standard deviation of the color blur. Lower values preserve
+            edges better whilst higher values tend to a simple gaussian spatial blur.
+
+        fast approx: This flag chooses between two implementations. The approximate method may
+            produce artifacts in some scenarios whereas the exact solution may be intolerably
+            slow for high spatial standard deviations.
+
+    Returns:
+        output (torch.Tensor): output tensor.
+    """
+
+    @staticmethod
+    def forward(ctx, input, spatial_sigma=5, color_sigma=0.5, fast_approx=True):
+        ctx.ss = spatial_sigma
+        ctx.cs = color_sigma
+        ctx.fa = fast_approx
+        output_data = _C.bilateral_filter(input, spatial_sigma, color_sigma, fast_approx)
+        return output_data
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        spatial_sigma, color_sigma, fast_approx = ctx.ss, ctx.cs, ctx.fa
+        grad_input = _C.bilateral_filter(grad_output, spatial_sigma, color_sigma, fast_approx)
+        return grad_input, None, None, None
+
+
+class PHLFilter(torch.autograd.Function):
+    """
+    Filters input based on arbitrary feature vectors. Uses a permutohedral
+    lattice data structure to efficiently approximate n-dimensional gaussian
+    filtering. Complexity is broadly independent of kernel size. Most applicable
+    to higher filter dimensions and larger kernel sizes.
+
+    See:
+        https://graphics.stanford.edu/papers/permutohedral/
+
+    Args:
+        input: input tensor to be filtered.
+
+        features: feature tensor used to filter the input.
+
+        sigmas: the standard deviations of each feature in the filter.
+
+    Returns:
+        output (torch.Tensor): output tensor.
+    """
+
+    @staticmethod
+    def forward(ctx, input, features, sigmas=None):
+
+        scaled_features = features
+        if sigmas is not None:
+            for i in range(features.size(1)):
+                scaled_features[:, i, ...] /= sigmas[i]
+
+        ctx.save_for_backward(scaled_features)
+        output_data = _C.phl_filter(input, scaled_features)
+        return output_data
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        raise NotImplementedError("PHLFilter does not currently support Backpropagation")
+        # scaled_features, = ctx.saved_variables
+        # grad_input = _C.phl_filter(grad_output, scaled_features)
+        # return grad_input
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/gmm.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/gmm.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb9a3f91e40f72be77b640369f79196f12bbed59
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/gmm.py
@@ -0,0 +1,85 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from monai._extensions.loader import load_module
+
+__all__ = ["GaussianMixtureModel"]
+
+
+class GaussianMixtureModel:
+    """
+    Takes an initial labeling and uses a mixture of Gaussians to approximate each classes
+    distribution in the feature space. Each unlabeled element is then assigned a probability
+    of belonging to each class based on it's fit to each classes approximated distribution.
+
+    See:
+        https://en.wikipedia.org/wiki/Mixture_model
+    """
+
+    def __init__(self, channel_count: int, mixture_count: int, mixture_size: int, verbose_build: bool = False):
+        """
+        Args:
+            channel_count: The number of features per element.
+            mixture_count: The number of class distributions.
+            mixture_size: The number Gaussian components per class distribution.
+            verbose_build: If ``True``, turns on verbose logging of load steps.
+        """
+        if not torch.cuda.is_available():
+            raise NotImplementedError("GaussianMixtureModel is currently implemented for CUDA.")
+        self.channel_count = channel_count
+        self.mixture_count = mixture_count
+        self.mixture_size = mixture_size
+        self.compiled_extension = load_module(
+            "gmm",
+            {"CHANNEL_COUNT": channel_count, "MIXTURE_COUNT": mixture_count, "MIXTURE_SIZE": mixture_size},
+            verbose_build=verbose_build,
+        )
+        self.params, self.scratch = self.compiled_extension.init()
+
+    def reset(self):
+        """
+        Resets the parameters of the model.
+        """
+        self.params, self.scratch = self.compiled_extension.init()
+
+    def learn(self, features, labels):
+        """
+        Learns, from scratch, the distribution of each class from the provided labels.
+
+        Args:
+            features (torch.Tensor): features for each element.
+            labels (torch.Tensor): initial labeling for each element.
+        """
+        self.compiled_extension.learn(self.params, self.scratch, features, labels)
+
+    def apply(self, features):
+        """
+        Applies the current model to a set of feature vectors.
+
+        Args:
+            features (torch.Tensor): feature vectors for each element.
+
+        Returns:
+            output (torch.Tensor): class assignment probabilities for each element.
+        """
+        return _ApplyFunc.apply(self.params, features, self.compiled_extension)
+
+
+class _ApplyFunc(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, params, features, compiled_extension):
+        return compiled_extension.apply(params, features)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        raise NotImplementedError("GMM does not support backpropagation")
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/simplelayers.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/simplelayers.py
new file mode 100644
index 0000000000000000000000000000000000000000..3de4e75766d78972310f6eea5b6e799cf88a8389
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/simplelayers.py
@@ -0,0 +1,532 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from copy import deepcopy
+from typing import List, Sequence, Union
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.autograd import Function
+
+from monai.networks.layers.convutils import gaussian_1d
+from monai.networks.layers.factories import Conv
+from monai.utils import ChannelMatching, SkipMode, look_up_option, optional_import, pytorch_after
+from monai.utils.misc import issequenceiterable
+
+_C, _ = optional_import("monai._C")
+fft, _ = optional_import("torch.fft")
+
+__all__ = [
+    "ChannelPad",
+    "Flatten",
+    "GaussianFilter",
+    "HilbertTransform",
+    "LLTM",
+    "Reshape",
+    "SavitzkyGolayFilter",
+    "SkipConnection",
+    "apply_filter",
+    "separable_filtering",
+]
+
+
+class ChannelPad(nn.Module):
+    """
+    Expand the input tensor's channel dimension from length `in_channels` to `out_channels`,
+    by padding or a projection.
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        mode: Union[ChannelMatching, str] = ChannelMatching.PAD,
+    ):
+        """
+
+        Args:
+            spatial_dims: number of spatial dimensions of the input image.
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            mode: {``"pad"``, ``"project"``}
+                Specifies handling residual branch and conv branch channel mismatches. Defaults to ``"pad"``.
+
+                - ``"pad"``: with zero padding.
+                - ``"project"``: with a trainable conv with kernel size one.
+        """
+        super().__init__()
+        self.project = None
+        self.pad = None
+        if in_channels == out_channels:
+            return
+        mode = look_up_option(mode, ChannelMatching)
+        if mode == ChannelMatching.PROJECT:
+            conv_type = Conv[Conv.CONV, spatial_dims]
+            self.project = conv_type(in_channels, out_channels, kernel_size=1)
+            return
+        if mode == ChannelMatching.PAD:
+            if in_channels > out_channels:
+                raise ValueError('Incompatible values: channel_matching="pad" and in_channels > out_channels.')
+            pad_1 = (out_channels - in_channels) // 2
+            pad_2 = out_channels - in_channels - pad_1
+            pad = [0, 0] * spatial_dims + [pad_1, pad_2] + [0, 0]
+            self.pad = tuple(pad)
+            return
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.project is not None:
+            return torch.as_tensor(self.project(x))  # as_tensor used to get around mypy typing bug
+        if self.pad is not None:
+            return F.pad(x, self.pad)
+        return x
+
+
+class SkipConnection(nn.Module):
+    """
+    Combine the forward pass input with the result from the given submodule::
+
+        --+--submodule--o--
+          |_____________|
+
+    The available modes are ``"cat"``, ``"add"``, ``"mul"``.
+    """
+
+    def __init__(self, submodule, dim: int = 1, mode: Union[str, SkipMode] = "cat") -> None:
+        """
+
+        Args:
+            submodule: the module defines the trainable branch.
+            dim: the dimension over which the tensors are concatenated.
+                Used when mode is ``"cat"``.
+            mode: ``"cat"``, ``"add"``, ``"mul"``. defaults to ``"cat"``.
+        """
+        super().__init__()
+        self.submodule = submodule
+        self.dim = dim
+        self.mode = look_up_option(mode, SkipMode).value
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        y = self.submodule(x)
+
+        if self.mode == "cat":
+            return torch.cat([x, y], dim=self.dim)
+        if self.mode == "add":
+            return torch.add(x, y)
+        if self.mode == "mul":
+            return torch.mul(x, y)
+        raise NotImplementedError(f"Unsupported mode {self.mode}.")
+
+
+class Flatten(nn.Module):
+    """
+    Flattens the given input in the forward pass to be [B,-1] in shape.
+    """
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.view(x.size(0), -1)
+
+
+class Reshape(nn.Module):
+    """
+    Reshapes input tensors to the given shape (minus batch dimension), retaining original batch size.
+    """
+
+    def __init__(self, *shape: int) -> None:
+        """
+        Given a shape list/tuple `shape` of integers (s0, s1, ... , sn), this layer will reshape input tensors of
+        shape (batch, s0 * s1 * ... * sn) to shape (batch, s0, s1, ... , sn).
+
+        Args:
+            shape: list/tuple of integer shape dimensions
+        """
+        super().__init__()
+        self.shape = (1,) + tuple(shape)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        shape = list(self.shape)
+        shape[0] = x.shape[0]  # done this way for Torchscript
+        return x.reshape(shape)
+
+
+def _separable_filtering_conv(
+    input_: torch.Tensor,
+    kernels: List[torch.Tensor],
+    pad_mode: str,
+    d: int,
+    spatial_dims: int,
+    paddings: List[int],
+    num_channels: int,
+) -> torch.Tensor:
+
+    if d < 0:
+        return input_
+
+    s = [1] * len(input_.shape)
+    s[d + 2] = -1
+    _kernel = kernels[d].reshape(s)
+
+    # if filter kernel is unity, don't convolve
+    if _kernel.numel() == 1 and _kernel[0] == 1:
+        return _separable_filtering_conv(input_, kernels, pad_mode, d - 1, spatial_dims, paddings, num_channels)
+
+    _kernel = _kernel.repeat([num_channels, 1] + [1] * spatial_dims)
+    _padding = [0] * spatial_dims
+    _padding[d] = paddings[d]
+    conv_type = [F.conv1d, F.conv2d, F.conv3d][spatial_dims - 1]
+
+    # translate padding for input to torch.nn.functional.pad
+    _reversed_padding_repeated_twice: List[List[int]] = [[p, p] for p in reversed(_padding)]
+    _sum_reversed_padding_repeated_twice: List[int] = sum(_reversed_padding_repeated_twice, [])
+    padded_input = F.pad(input_, _sum_reversed_padding_repeated_twice, mode=pad_mode)
+
+    return conv_type(
+        input=_separable_filtering_conv(padded_input, kernels, pad_mode, d - 1, spatial_dims, paddings, num_channels),
+        weight=_kernel,
+        groups=num_channels,
+    )
+
+
+def separable_filtering(x: torch.Tensor, kernels: List[torch.Tensor], mode: str = "zeros") -> torch.Tensor:
+    """
+    Apply 1-D convolutions along each spatial dimension of `x`.
+
+    Args:
+        x: the input image. must have shape (batch, channels, H[, W, ...]).
+        kernels: kernel along each spatial dimension.
+            could be a single kernel (duplicated for all spatial dimensions), or
+            a list of `spatial_dims` number of kernels.
+        mode (string, optional): padding mode passed to convolution class. ``'zeros'``, ``'reflect'``, ``'replicate'``
+            or ``'circular'``. Default: ``'zeros'``. See ``torch.nn.Conv1d()`` for more information.
+
+    Raises:
+        TypeError: When ``x`` is not a ``torch.Tensor``.
+
+    Examples:
+
+    .. code-block:: python
+
+        >>> import torch
+        >>> from monai.networks.layers import separable_filtering
+        >>> img = torch.randn(2, 4, 32, 32)  # batch_size 2, channels 4, 32x32 2D images
+        # applying a [-1, 0, 1] filter along each of the spatial dimensions.
+        # the output shape is the same as the input shape.
+        >>> out = separable_filtering(img, torch.tensor((-1., 0., 1.)))
+        # applying `[-1, 0, 1]`, `[1, 0, -1]` filters along two spatial dimensions respectively.
+        # the output shape is the same as the input shape.
+        >>> out = separable_filtering(img, [torch.tensor((-1., 0., 1.)), torch.tensor((1., 0., -1.))])
+
+    """
+
+    if not isinstance(x, torch.Tensor):
+        raise TypeError(f"x must be a torch.Tensor but is {type(x).__name__}.")
+
+    spatial_dims = len(x.shape) - 2
+    if isinstance(kernels, torch.Tensor):
+        kernels = [kernels] * spatial_dims
+    _kernels = [s.to(x) for s in kernels]
+    _paddings = [(k.shape[0] - 1) // 2 for k in _kernels]
+    n_chs = x.shape[1]
+    pad_mode = "constant" if mode == "zeros" else mode
+
+    return _separable_filtering_conv(x, _kernels, pad_mode, spatial_dims - 1, spatial_dims, _paddings, n_chs)
+
+
+def apply_filter(x: torch.Tensor, kernel: torch.Tensor, **kwargs) -> torch.Tensor:
+    """
+    Filtering `x` with `kernel` independently for each batch and channel respectively.
+
+    Args:
+        x: the input image, must have shape (batch, channels, H[, W, D]).
+        kernel: `kernel` must at least have the spatial shape (H_k[, W_k, D_k]).
+            `kernel` shape must be broadcastable to the `batch` and `channels` dimensions of `x`.
+        kwargs: keyword arguments passed to `conv*d()` functions.
+
+    Returns:
+        The filtered `x`.
+
+    Examples:
+
+    .. code-block:: python
+
+        >>> import torch
+        >>> from monai.networks.layers import apply_filter
+        >>> img = torch.rand(2, 5, 10, 10)  # batch_size 2, channels 5, 10x10 2D images
+        >>> out = apply_filter(img, torch.rand(3, 3))   # spatial kernel
+        >>> out = apply_filter(img, torch.rand(5, 3, 3))  # channel-wise kernels
+        >>> out = apply_filter(img, torch.rand(2, 5, 3, 3))  # batch-, channel-wise kernels
+
+    """
+    if not isinstance(x, torch.Tensor):
+        raise TypeError(f"x must be a torch.Tensor but is {type(x).__name__}.")
+    batch, chns, *spatials = x.shape
+    n_spatial = len(spatials)
+    if n_spatial > 3:
+        raise NotImplementedError(f"Only spatial dimensions up to 3 are supported but got {n_spatial}.")
+    k_size = len(kernel.shape)
+    if k_size < n_spatial or k_size > n_spatial + 2:
+        raise ValueError(
+            f"kernel must have {n_spatial} ~ {n_spatial + 2} dimensions to match the input shape {x.shape}."
+        )
+    kernel = kernel.to(x)
+    # broadcast kernel size to (batch chns, spatial_kernel_size)
+    kernel = kernel.expand(batch, chns, *kernel.shape[(k_size - n_spatial) :])
+    kernel = kernel.reshape(-1, 1, *kernel.shape[2:])  # group=1
+    x = x.view(1, kernel.shape[0], *spatials)
+    conv = [F.conv1d, F.conv2d, F.conv3d][n_spatial - 1]
+    if "padding" not in kwargs:
+        if pytorch_after(1, 10):
+            kwargs["padding"] = "same"
+        else:
+            # even-sized kernels are not supported
+            kwargs["padding"] = [(k - 1) // 2 for k in kernel.shape[2:]]
+
+    if "stride" not in kwargs:
+        kwargs["stride"] = 1
+    output = conv(x, kernel, groups=kernel.shape[0], bias=None, **kwargs)
+    return output.view(batch, chns, *output.shape[2:])
+
+
+class SavitzkyGolayFilter(nn.Module):
+    """
+    Convolve a Tensor along a particular axis with a Savitzky-Golay kernel.
+
+    Args:
+        window_length: Length of the filter window, must be a positive odd integer.
+        order: Order of the polynomial to fit to each window, must be less than ``window_length``.
+        axis (optional): Axis along which to apply the filter kernel. Default 2 (first spatial dimension).
+        mode (string, optional): padding mode passed to convolution class. ``'zeros'``, ``'reflect'``, ``'replicate'`` or
+        ``'circular'``. Default: ``'zeros'``. See torch.nn.Conv1d() for more information.
+    """
+
+    def __init__(self, window_length: int, order: int, axis: int = 2, mode: str = "zeros"):
+
+        super().__init__()
+        if order >= window_length:
+            raise ValueError("order must be less than window_length.")
+
+        self.axis = axis
+        self.mode = mode
+        self.coeffs = self._make_coeffs(window_length, order)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: Tensor or array-like to filter. Must be real, in shape ``[Batch, chns, spatial1, spatial2, ...]`` and
+                have a device type of ``'cpu'``.
+        Returns:
+            torch.Tensor: ``x`` filtered by Savitzky-Golay kernel with window length ``self.window_length`` using
+            polynomials of order ``self.order``, along axis specified in ``self.axis``.
+        """
+
+        # Make input a real tensor on the CPU
+        x = torch.as_tensor(x, device=x.device if isinstance(x, torch.Tensor) else None)
+        if torch.is_complex(x):
+            raise ValueError("x must be real.")
+        x = x.to(dtype=torch.float)
+
+        if (self.axis < 0) or (self.axis > len(x.shape) - 1):
+            raise ValueError(f"Invalid axis for shape of x, got axis {self.axis} and shape {x.shape}.")
+
+        # Create list of filter kernels (1 per spatial dimension). The kernel for self.axis will be the savgol coeffs,
+        # while the other kernels will be set to [1].
+        n_spatial_dims = len(x.shape) - 2
+        spatial_processing_axis = self.axis - 2
+        new_dims_before = spatial_processing_axis
+        new_dims_after = n_spatial_dims - spatial_processing_axis - 1
+        kernel_list = [self.coeffs.to(device=x.device, dtype=x.dtype)]
+        for _ in range(new_dims_before):
+            kernel_list.insert(0, torch.ones(1, device=x.device, dtype=x.dtype))
+        for _ in range(new_dims_after):
+            kernel_list.append(torch.ones(1, device=x.device, dtype=x.dtype))
+
+        return separable_filtering(x, kernel_list, mode=self.mode)
+
+    @staticmethod
+    def _make_coeffs(window_length, order):
+
+        half_length, rem = divmod(window_length, 2)
+        if rem == 0:
+            raise ValueError("window_length must be odd.")
+
+        idx = torch.arange(window_length - half_length - 1, -half_length - 1, -1, dtype=torch.float, device="cpu")
+        a = idx ** torch.arange(order + 1, dtype=torch.float, device="cpu").reshape(-1, 1)
+        y = torch.zeros(order + 1, dtype=torch.float, device="cpu")
+        y[0] = 1.0
+        return torch.lstsq(y, a).solution.squeeze()
+
+
+class HilbertTransform(nn.Module):
+    """
+    Determine the analytical signal of a Tensor along a particular axis.
+
+    Args:
+        axis: Axis along which to apply Hilbert transform. Default 2 (first spatial dimension).
+        n: Number of Fourier components (i.e. FFT size). Default: ``x.shape[axis]``.
+    """
+
+    def __init__(self, axis: int = 2, n: Union[int, None] = None) -> None:
+
+        super().__init__()
+        self.axis = axis
+        self.n = n
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: Tensor or array-like to transform. Must be real and in shape ``[Batch, chns, spatial1, spatial2, ...]``.
+        Returns:
+            torch.Tensor: Analytical signal of ``x``, transformed along axis specified in ``self.axis`` using
+            FFT of size ``self.N``. The absolute value of ``x_ht`` relates to the envelope of ``x`` along axis ``self.axis``.
+        """
+
+        # Make input a real tensor
+        x = torch.as_tensor(x, device=x.device if isinstance(x, torch.Tensor) else None)
+        if torch.is_complex(x):
+            raise ValueError("x must be real.")
+        x = x.to(dtype=torch.float)
+
+        if (self.axis < 0) or (self.axis > len(x.shape) - 1):
+            raise ValueError(f"Invalid axis for shape of x, got axis {self.axis} and shape {x.shape}.")
+
+        n = x.shape[self.axis] if self.n is None else self.n
+        if n <= 0:
+            raise ValueError("N must be positive.")
+        x = torch.as_tensor(x, dtype=torch.complex64)
+        # Create frequency axis
+        f = torch.cat(
+            [
+                torch.true_divide(torch.arange(0, (n - 1) // 2 + 1, device=x.device), float(n)),
+                torch.true_divide(torch.arange(-(n // 2), 0, device=x.device), float(n)),
+            ]
+        )
+        xf = fft.fft(x, n=n, dim=self.axis)
+        # Create step function
+        u = torch.heaviside(f, torch.tensor([0.5], device=f.device))
+        u = torch.as_tensor(u, dtype=x.dtype, device=u.device)
+        new_dims_before = self.axis
+        new_dims_after = len(xf.shape) - self.axis - 1
+        for _ in range(new_dims_before):
+            u.unsqueeze_(0)
+        for _ in range(new_dims_after):
+            u.unsqueeze_(-1)
+
+        ht = fft.ifft(xf * 2 * u, dim=self.axis)
+
+        # Apply transform
+        return torch.as_tensor(ht, device=ht.device, dtype=ht.dtype)
+
+
+class GaussianFilter(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor],
+        truncated: float = 4.0,
+        approx: str = "erf",
+        requires_grad: bool = False,
+    ) -> None:
+        """
+        Args:
+            spatial_dims: number of spatial dimensions of the input image.
+                must have shape (Batch, channels, H[, W, ...]).
+            sigma: std. could be a single value, or `spatial_dims` number of values.
+            truncated: spreads how many stds.
+            approx: discrete Gaussian kernel type, available options are "erf", "sampled", and "scalespace".
+
+                - ``erf`` approximation interpolates the error function;
+                - ``sampled`` uses a sampled Gaussian kernel;
+                - ``scalespace`` corresponds to
+                  https://en.wikipedia.org/wiki/Scale_space_implementation#The_discrete_Gaussian_kernel
+                  based on the modified Bessel functions.
+
+            requires_grad: whether to store the gradients for sigma.
+                if True, `sigma` will be the initial value of the parameters of this module
+                (for example `parameters()` iterator could be used to get the parameters);
+                otherwise this module will fix the kernels using `sigma` as the std.
+        """
+        if issequenceiterable(sigma):
+            if len(sigma) != spatial_dims:  # type: ignore
+                raise ValueError
+        else:
+            sigma = [deepcopy(sigma) for _ in range(spatial_dims)]  # type: ignore
+        super().__init__()
+        self.sigma = [
+            torch.nn.Parameter(
+                torch.as_tensor(s, dtype=torch.float, device=s.device if isinstance(s, torch.Tensor) else None),
+                requires_grad=requires_grad,
+            )
+            for s in sigma  # type: ignore
+        ]
+        self.truncated = truncated
+        self.approx = approx
+        for idx, param in enumerate(self.sigma):
+            self.register_parameter(f"kernel_sigma_{idx}", param)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: in shape [Batch, chns, H, W, D].
+        """
+        _kernel = [gaussian_1d(s, truncated=self.truncated, approx=self.approx) for s in self.sigma]
+        return separable_filtering(x=x, kernels=_kernel)
+
+
+class LLTMFunction(Function):
+    @staticmethod
+    def forward(ctx, input, weights, bias, old_h, old_cell):
+        outputs = _C.lltm_forward(input, weights, bias, old_h, old_cell)
+        new_h, new_cell = outputs[:2]
+        variables = outputs[1:] + [weights]
+        ctx.save_for_backward(*variables)
+
+        return new_h, new_cell
+
+    @staticmethod
+    def backward(ctx, grad_h, grad_cell):
+        outputs = _C.lltm_backward(grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_tensors)
+        d_old_h, d_input, d_weights, d_bias, d_old_cell = outputs[:5]
+
+        return d_input, d_weights, d_bias, d_old_h, d_old_cell
+
+
+class LLTM(nn.Module):
+    """
+    This recurrent unit is similar to an LSTM, but differs in that it lacks a forget
+    gate and uses an Exponential Linear Unit (ELU) as its internal activation function.
+    Because this unit never forgets, call it LLTM, or Long-Long-Term-Memory unit.
+    It has both C++ and CUDA implementation, automatically switch according to the
+    target device where put this module to.
+
+    Args:
+        input_features: size of input feature data
+        state_size: size of the state of recurrent unit
+
+    Referring to: https://pytorch.org/tutorials/advanced/cpp_extension.html
+    """
+
+    def __init__(self, input_features: int, state_size: int):
+        super().__init__()
+        self.input_features = input_features
+        self.state_size = state_size
+        self.weights = nn.Parameter(torch.empty(3 * state_size, input_features + state_size))
+        self.bias = nn.Parameter(torch.empty(1, 3 * state_size))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        stdv = 1.0 / math.sqrt(self.state_size)
+        for weight in self.parameters():
+            weight.data.uniform_(-stdv, +stdv)
+
+    def forward(self, input, state):
+        return LLTMFunction.apply(input, self.weights, self.bias, *state)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/spatial_transforms.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/spatial_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..07ddb3ce9d9b96d1bce4f6a5acfa4efc79e0521f
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/spatial_transforms.py
@@ -0,0 +1,564 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks import to_norm_affine
+from monai.utils import GridSampleMode, GridSamplePadMode, ensure_tuple, look_up_option, optional_import
+
+_C, _ = optional_import("monai._C")
+
+__all__ = ["AffineTransform", "grid_pull", "grid_push", "grid_count", "grid_grad"]
+
+
+class _GridPull(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, grid, interpolation, bound, extrapolate):
+        opt = (bound, interpolation, extrapolate)
+        output = _C.grid_pull(input, grid, *opt)
+        if input.requires_grad or grid.requires_grad:
+            ctx.opt = opt
+            ctx.save_for_backward(input, grid)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad):
+        if not (ctx.needs_input_grad[0] or ctx.needs_input_grad[1]):
+            return None, None, None, None, None
+        var = ctx.saved_tensors
+        opt = ctx.opt
+        grads = _C.grid_pull_backward(grad, *var, *opt)
+        if ctx.needs_input_grad[0]:
+            return grads[0], grads[1] if ctx.needs_input_grad[1] else None, None, None, None
+        if ctx.needs_input_grad[1]:
+            return None, grads[0], None, None, None
+
+
+def grid_pull(
+    input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True
+) -> torch.Tensor:
+    """
+    Sample an image with respect to a deformation field.
+
+    `interpolation` can be an int, a string or an InterpolationType.
+    Possible values are::
+
+        - 0 or 'nearest'    or InterpolationType.nearest
+        - 1 or 'linear'     or InterpolationType.linear
+        - 2 or 'quadratic'  or InterpolationType.quadratic
+        - 3 or 'cubic'      or InterpolationType.cubic
+        - 4 or 'fourth'     or InterpolationType.fourth
+        - 5 or 'fifth'      or InterpolationType.fifth
+        - 6 or 'sixth'      or InterpolationType.sixth
+        - 7 or 'seventh'    or InterpolationType.seventh
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific interpolation orders.
+
+    `bound` can be an int, a string or a BoundType.
+    Possible values are::
+
+        - 0 or 'replicate' or 'nearest'      or BoundType.replicate or 'border'
+        - 1 or 'dct1'      or 'mirror'       or BoundType.dct1
+        - 2 or 'dct2'      or 'reflect'      or BoundType.dct2
+        - 3 or 'dst1'      or 'antimirror'   or BoundType.dst1
+        - 4 or 'dst2'      or 'antireflect'  or BoundType.dst2
+        - 5 or 'dft'       or 'wrap'         or BoundType.dft
+        - 7 or 'zero'      or 'zeros'        or BoundType.zero
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific boundary conditions.
+    `sliding` is a specific condition than only applies to flow fields
+    (with as many channels as dimensions). It cannot be dimension-specific.
+    Note that:
+
+        - `dft` corresponds to circular padding
+        - `dct2` corresponds to Neumann boundary conditions (symmetric)
+        - `dst2` corresponds to Dirichlet boundary conditions (antisymmetric)
+
+    See Also:
+        - https://en.wikipedia.org/wiki/Discrete_cosine_transform
+        - https://en.wikipedia.org/wiki/Discrete_sine_transform
+        - ``help(monai._C.BoundType)``
+        - ``help(monai._C.InterpolationType)``
+
+    Args:
+        input: Input image. `(B, C, Wi, Hi, Di)`.
+        grid: Deformation field. `(B, Wo, Ho, Do, 1|2|3)`.
+        interpolation (int or list[int] , optional): Interpolation order.
+            Defaults to `'linear'`.
+        bound (BoundType, or list[BoundType], optional): Boundary conditions.
+            Defaults to `'zero'`.
+        extrapolate: Extrapolate out-of-bound data.
+            Defaults to `True`.
+
+    Returns:
+        output (torch.Tensor): Deformed image `(B, C, Wo, Ho, Do)`.
+
+    """
+    # Convert parameters
+    bound = [_C.BoundType.__members__[b] if isinstance(b, str) else _C.BoundType(b) for b in ensure_tuple(bound)]
+    interpolation = [
+        _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
+        for i in ensure_tuple(interpolation)
+    ]
+    out: torch.Tensor
+    out = _GridPull.apply(input, grid, interpolation, bound, extrapolate)
+    return out
+
+
+class _GridPush(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, grid, shape, interpolation, bound, extrapolate):
+        opt = (bound, interpolation, extrapolate)
+        output = _C.grid_push(input, grid, shape, *opt)
+        if input.requires_grad or grid.requires_grad:
+            ctx.opt = opt
+            ctx.save_for_backward(input, grid)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad):
+        if not (ctx.needs_input_grad[0] or ctx.needs_input_grad[1]):
+            return None, None, None, None, None, None
+        var = ctx.saved_tensors
+        opt = ctx.opt
+        grads = _C.grid_push_backward(grad, *var, *opt)
+        if ctx.needs_input_grad[0]:
+            return grads[0], grads[1] if ctx.needs_input_grad[1] else None, None, None, None, None
+        if ctx.needs_input_grad[1]:
+            return None, grads[0], None, None, None, None
+
+
+def grid_push(
+    input: torch.Tensor, grid: torch.Tensor, shape=None, interpolation="linear", bound="zero", extrapolate: bool = True
+):
+    """
+    Splat an image with respect to a deformation field (pull adjoint).
+
+    `interpolation` can be an int, a string or an InterpolationType.
+    Possible values are::
+
+        - 0 or 'nearest'    or InterpolationType.nearest
+        - 1 or 'linear'     or InterpolationType.linear
+        - 2 or 'quadratic'  or InterpolationType.quadratic
+        - 3 or 'cubic'      or InterpolationType.cubic
+        - 4 or 'fourth'     or InterpolationType.fourth
+        - 5 or 'fifth'      or InterpolationType.fifth
+        - 6 or 'sixth'      or InterpolationType.sixth
+        - 7 or 'seventh'    or InterpolationType.seventh
+
+    A list of values can be provided, in the order `[W, H, D]`,
+    to specify dimension-specific interpolation orders.
+
+    `bound` can be an int, a string or a BoundType.
+    Possible values are::
+
+        - 0 or 'replicate' or 'nearest'      or BoundType.replicate
+        - 1 or 'dct1'      or 'mirror'       or BoundType.dct1
+        - 2 or 'dct2'      or 'reflect'      or BoundType.dct2
+        - 3 or 'dst1'      or 'antimirror'   or BoundType.dst1
+        - 4 or 'dst2'      or 'antireflect'  or BoundType.dst2
+        - 5 or 'dft'       or 'wrap'         or BoundType.dft
+        - 7 or 'zero'                        or BoundType.zero
+
+    A list of values can be provided, in the order `[W, H, D]`,
+    to specify dimension-specific boundary conditions.
+    `sliding` is a specific condition than only applies to flow fields
+    (with as many channels as dimensions). It cannot be dimension-specific.
+    Note that:
+
+        - `dft` corresponds to circular padding
+        - `dct2` corresponds to Neumann boundary conditions (symmetric)
+        - `dst2` corresponds to Dirichlet boundary conditions (antisymmetric)
+
+    See Also:
+
+        - https://en.wikipedia.org/wiki/Discrete_cosine_transform
+        - https://en.wikipedia.org/wiki/Discrete_sine_transform
+        - ``help(monai._C.BoundType)``
+        - ``help(monai._C.InterpolationType)``
+
+    Args:
+        input: Input image `(B, C, Wi, Hi, Di)`.
+        grid: Deformation field `(B, Wi, Hi, Di, 1|2|3)`.
+        shape: Shape of the source image.
+        interpolation (int or list[int] , optional): Interpolation order.
+            Defaults to `'linear'`.
+        bound (BoundType, or list[BoundType], optional): Boundary conditions.
+            Defaults to `'zero'`.
+        extrapolate: Extrapolate out-of-bound data.
+            Defaults to `True`.
+
+    Returns:
+        output (torch.Tensor): Splatted image `(B, C, Wo, Ho, Do)`.
+
+    """
+    # Convert parameters
+    bound = [_C.BoundType.__members__[b] if isinstance(b, str) else _C.BoundType(b) for b in ensure_tuple(bound)]
+    interpolation = [
+        _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
+        for i in ensure_tuple(interpolation)
+    ]
+
+    if shape is None:
+        shape = tuple(input.shape[2:])
+
+    return _GridPush.apply(input, grid, shape, interpolation, bound, extrapolate)
+
+
+class _GridCount(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, grid, shape, interpolation, bound, extrapolate):
+        opt = (bound, interpolation, extrapolate)
+        output = _C.grid_count(grid, shape, *opt)
+        if grid.requires_grad:
+            ctx.opt = opt
+            ctx.save_for_backward(grid)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad):
+        if ctx.needs_input_grad[0]:
+            var = ctx.saved_tensors
+            opt = ctx.opt
+            return _C.grid_count_backward(grad, *var, *opt), None, None, None, None
+        return None, None, None, None, None
+
+
+def grid_count(grid: torch.Tensor, shape=None, interpolation="linear", bound="zero", extrapolate: bool = True):
+    """
+    Splatting weights with respect to a deformation field (pull adjoint).
+
+    This function is equivalent to applying grid_push to an image of ones.
+
+    `interpolation` can be an int, a string or an InterpolationType.
+    Possible values are::
+
+        - 0 or 'nearest'    or InterpolationType.nearest
+        - 1 or 'linear'     or InterpolationType.linear
+        - 2 or 'quadratic'  or InterpolationType.quadratic
+        - 3 or 'cubic'      or InterpolationType.cubic
+        - 4 or 'fourth'     or InterpolationType.fourth
+        - 5 or 'fifth'      or InterpolationType.fifth
+        - 6 or 'sixth'      or InterpolationType.sixth
+        - 7 or 'seventh'    or InterpolationType.seventh
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific interpolation orders.
+
+    `bound` can be an int, a string or a BoundType.
+    Possible values are::
+
+        - 0 or 'replicate' or 'nearest'      or BoundType.replicate
+        - 1 or 'dct1'      or 'mirror'       or BoundType.dct1
+        - 2 or 'dct2'      or 'reflect'      or BoundType.dct2
+        - 3 or 'dst1'      or 'antimirror'   or BoundType.dst1
+        - 4 or 'dst2'      or 'antireflect'  or BoundType.dst2
+        - 5 or 'dft'       or 'wrap'         or BoundType.dft
+        - 7 or 'zero'                        or BoundType.zero
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific boundary conditions.
+    `sliding` is a specific condition than only applies to flow fields
+    (with as many channels as dimensions). It cannot be dimension-specific.
+    Note that:
+
+        - `dft` corresponds to circular padding
+        - `dct2` corresponds to Neumann boundary conditions (symmetric)
+        - `dst2` corresponds to Dirichlet boundary conditions (antisymmetric)
+
+    See Also:
+
+        - https://en.wikipedia.org/wiki/Discrete_cosine_transform
+        - https://en.wikipedia.org/wiki/Discrete_sine_transform
+        - ``help(monai._C.BoundType)``
+        - ``help(monai._C.InterpolationType)``
+
+    Args:
+        grid: Deformation field `(B, Wi, Hi, Di, 2|3)`.
+        shape: shape of the source image.
+        interpolation (int or list[int] , optional): Interpolation order.
+            Defaults to `'linear'`.
+        bound (BoundType, or list[BoundType], optional): Boundary conditions.
+            Defaults to `'zero'`.
+        extrapolate (bool, optional): Extrapolate out-of-bound data.
+            Defaults to `True`.
+
+    Returns:
+        output (torch.Tensor): Splat weights `(B, 1, Wo, Ho, Do)`.
+
+    """
+    # Convert parameters
+    bound = [_C.BoundType.__members__[b] if isinstance(b, str) else _C.BoundType(b) for b in ensure_tuple(bound)]
+    interpolation = [
+        _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
+        for i in ensure_tuple(interpolation)
+    ]
+
+    if shape is None:
+        shape = tuple(grid.shape[2:])
+
+    return _GridCount.apply(grid, shape, interpolation, bound, extrapolate)
+
+
+class _GridGrad(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, grid, interpolation, bound, extrapolate):
+        opt = (bound, interpolation, extrapolate)
+        output = _C.grid_grad(input, grid, *opt)
+        if input.requires_grad or grid.requires_grad:
+            ctx.opt = opt
+            ctx.save_for_backward(input, grid)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad):
+        if not (ctx.needs_input_grad[0] or ctx.needs_input_grad[1]):
+            return None, None, None, None, None
+        var = ctx.saved_tensors
+        opt = ctx.opt
+        grads = _C.grid_grad_backward(grad, *var, *opt)
+        if ctx.needs_input_grad[0]:
+            return grads[0], grads[1] if ctx.needs_input_grad[1] else None, None, None, None
+        if ctx.needs_input_grad[1]:
+            return None, grads[0], None, None, None
+
+
+def grid_grad(input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True):
+    """
+    Sample an image with respect to a deformation field.
+
+    `interpolation` can be an int, a string or an InterpolationType.
+    Possible values are::
+
+        - 0 or 'nearest'    or InterpolationType.nearest
+        - 1 or 'linear'     or InterpolationType.linear
+        - 2 or 'quadratic'  or InterpolationType.quadratic
+        - 3 or 'cubic'      or InterpolationType.cubic
+        - 4 or 'fourth'     or InterpolationType.fourth
+        - 5 or 'fifth'      or InterpolationType.fifth
+        - 6 or 'sixth'      or InterpolationType.sixth
+        - 7 or 'seventh'    or InterpolationType.seventh
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific interpolation orders.
+
+    `bound` can be an int, a string or a BoundType.
+    Possible values are::
+
+        - 0 or 'replicate' or 'nearest'      or BoundType.replicate
+        - 1 or 'dct1'      or 'mirror'       or BoundType.dct1
+        - 2 or 'dct2'      or 'reflect'      or BoundType.dct2
+        - 3 or 'dst1'      or 'antimirror'   or BoundType.dst1
+        - 4 or 'dst2'      or 'antireflect'  or BoundType.dst2
+        - 5 or 'dft'       or 'wrap'         or BoundType.dft
+        - 7 or 'zero'                        or BoundType.zero
+
+    A list of values can be provided, in the order [W, H, D],
+    to specify dimension-specific boundary conditions.
+    `sliding` is a specific condition than only applies to flow fields
+    (with as many channels as dimensions). It cannot be dimension-specific.
+    Note that:
+
+        - `dft` corresponds to circular padding
+        - `dct2` corresponds to Neumann boundary conditions (symmetric)
+        - `dst2` corresponds to Dirichlet boundary conditions (antisymmetric)
+
+    See Also:
+
+        - https://en.wikipedia.org/wiki/Discrete_cosine_transform
+        - https://en.wikipedia.org/wiki/Discrete_sine_transform
+        - ``help(monai._C.BoundType)``
+        - ``help(monai._C.InterpolationType)``
+
+
+    Args:
+        input: Input image. `(B, C, Wi, Hi, Di)`.
+        grid: Deformation field. `(B, Wo, Ho, Do, 2|3)`.
+        interpolation (int or list[int] , optional): Interpolation order.
+            Defaults to `'linear'`.
+        bound (BoundType, or list[BoundType], optional): Boundary conditions.
+            Defaults to `'zero'`.
+        extrapolate: Extrapolate out-of-bound data. Defaults to `True`.
+
+    Returns:
+        output (torch.Tensor): Sampled gradients (B, C, Wo, Ho, Do, 1|2|3).
+
+    """
+    # Convert parameters
+    bound = [_C.BoundType.__members__[b] if isinstance(b, str) else _C.BoundType(b) for b in ensure_tuple(bound)]
+    interpolation = [
+        _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
+        for i in ensure_tuple(interpolation)
+    ]
+
+    return _GridGrad.apply(input, grid, interpolation, bound, extrapolate)
+
+
+class AffineTransform(nn.Module):
+    def __init__(
+        self,
+        spatial_size: Optional[Union[Sequence[int], int]] = None,
+        normalized: bool = False,
+        mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
+        padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.ZEROS,
+        align_corners: bool = False,
+        reverse_indexing: bool = True,
+        zero_centered: Optional[bool] = None,
+    ) -> None:
+        """
+        Apply affine transformations with a batch of affine matrices.
+
+        When `normalized=False` and `reverse_indexing=True`,
+        it does the commonly used resampling in the 'pull' direction
+        following the ``scipy.ndimage.affine_transform`` convention.
+        In this case `theta` is equivalent to (ndim+1, ndim+1) input ``matrix`` of ``scipy.ndimage.affine_transform``,
+        operates on homogeneous coordinates.
+        See also: https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.affine_transform.html
+
+        When `normalized=True` and `reverse_indexing=False`,
+        it applies `theta` to the normalized coordinates (coords. in the range of [-1, 1]) directly.
+        This is often used with `align_corners=False` to achieve resolution-agnostic resampling,
+        thus useful as a part of trainable modules such as the spatial transformer networks.
+        See also: https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html
+
+        Args:
+            spatial_size: output spatial shape, the full output shape will be
+                `[N, C, *spatial_size]` where N and C are inferred from the `src` input of `self.forward`.
+            normalized: indicating whether the provided affine matrix `theta` is defined
+                for the normalized coordinates. If `normalized=False`, `theta` will be converted
+                to operate on normalized coordinates as pytorch affine_grid works with the normalized
+                coordinates.
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"zeros"``.
+                See also: https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html
+            align_corners: see also https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html.
+            reverse_indexing: whether to reverse the spatial indexing of image and coordinates.
+                set to `False` if `theta` follows pytorch's default "D, H, W" convention.
+                set to `True` if `theta` follows `scipy.ndimage` default "i, j, k" convention.
+            zero_centered: whether the affine is applied to coordinates in a zero-centered value range.
+                With `zero_centered=True`, for example, the center of rotation will be the
+                spatial center of the input; with `zero_centered=False`, the center of rotation will be the
+                origin of the input. This option is only available when `normalized=False`,
+                where the default behaviour is `False` if unspecified.
+                See also: :py:func:`monai.networks.utils.normalize_transform`.
+        """
+        super().__init__()
+        self.spatial_size = ensure_tuple(spatial_size) if spatial_size is not None else None
+        self.normalized = normalized
+        self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
+        self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
+        self.align_corners = align_corners
+        self.reverse_indexing = reverse_indexing
+        if zero_centered is not None and self.normalized:
+            raise ValueError("`normalized=True` is not compatible with the `zero_centered` option.")
+        self.zero_centered = zero_centered if zero_centered is not None else False
+
+    def forward(
+        self, src: torch.Tensor, theta: torch.Tensor, spatial_size: Optional[Union[Sequence[int], int]] = None
+    ) -> torch.Tensor:
+        """
+        ``theta`` must be an affine transformation matrix with shape
+        3x3 or Nx3x3 or Nx2x3 or 2x3 for spatial 2D transforms,
+        4x4 or Nx4x4 or Nx3x4 or 3x4 for spatial 3D transforms,
+        where `N` is the batch size. `theta` will be converted into float Tensor for the computation.
+
+        Args:
+            src (array_like): image in spatial 2D or 3D (N, C, spatial_dims),
+                where N is the batch dim, C is the number of channels.
+            theta (array_like): Nx3x3, Nx2x3, 3x3, 2x3 for spatial 2D inputs,
+                Nx4x4, Nx3x4, 3x4, 4x4 for spatial 3D inputs. When the batch dimension is omitted,
+                `theta` will be repeated N times, N is the batch dim of `src`.
+            spatial_size: output spatial shape, the full output shape will be
+                `[N, C, *spatial_size]` where N and C are inferred from the `src`.
+
+        Raises:
+            TypeError: When ``theta`` is not a ``torch.Tensor``.
+            ValueError: When ``theta`` is not one of [Nxdxd, dxd].
+            ValueError: When ``theta`` is not one of [Nx3x3, Nx4x4].
+            TypeError: When ``src`` is not a ``torch.Tensor``.
+            ValueError: When ``src`` spatially is not one of [2D, 3D].
+            ValueError: When affine and image batch dimension differ.
+
+        """
+        # validate `theta`
+        if not isinstance(theta, torch.Tensor):
+            raise TypeError(f"theta must be torch.Tensor but is {type(theta).__name__}.")
+        if theta.dim() not in (2, 3):
+            raise ValueError(f"theta must be Nxdxd or dxd, got {theta.shape}.")
+        if theta.dim() == 2:
+            theta = theta[None]  # adds a batch dim.
+        theta = theta.clone()  # no in-place change of theta
+        theta_shape = tuple(theta.shape[1:])
+        if theta_shape in ((2, 3), (3, 4)):  # needs padding to dxd
+            pad_affine = torch.tensor([0, 0, 1] if theta_shape[0] == 2 else [0, 0, 0, 1])
+            pad_affine = pad_affine.repeat(theta.shape[0], 1, 1).to(theta)
+            pad_affine.requires_grad = False
+            theta = torch.cat([theta, pad_affine], dim=1)
+        if tuple(theta.shape[1:]) not in ((3, 3), (4, 4)):
+            raise ValueError(f"theta must be Nx3x3 or Nx4x4, got {theta.shape}.")
+
+        # validate `src`
+        if not isinstance(src, torch.Tensor):
+            raise TypeError(f"src must be torch.Tensor but is {type(src).__name__}.")
+        sr = src.dim() - 2  # input spatial rank
+        if sr not in (2, 3):
+            raise ValueError(f"Unsupported src dimension: {sr}, available options are [2, 3].")
+
+        # set output shape
+        src_size = tuple(src.shape)
+        dst_size = src_size  # default to the src shape
+        if self.spatial_size is not None:
+            dst_size = src_size[:2] + self.spatial_size
+        if spatial_size is not None:
+            dst_size = src_size[:2] + ensure_tuple(spatial_size)
+
+        # reverse and normalize theta if needed
+        if not self.normalized:
+            theta = to_norm_affine(
+                affine=theta,
+                src_size=src_size[2:],
+                dst_size=dst_size[2:],
+                align_corners=self.align_corners,
+                zero_centered=self.zero_centered,
+            )
+        if self.reverse_indexing:
+            rev_idx = torch.as_tensor(range(sr - 1, -1, -1), device=src.device)
+            theta[:, :sr] = theta[:, rev_idx]
+            theta[:, :, :sr] = theta[:, :, rev_idx]
+        if (theta.shape[0] == 1) and src_size[0] > 1:
+            # adds a batch dim to `theta` in order to match `src`
+            theta = theta.repeat(src_size[0], 1, 1)
+        if theta.shape[0] != src_size[0]:
+            raise ValueError(
+                f"affine and image batch dimension must match, got affine={theta.shape[0]} image={src_size[0]}."
+            )
+
+        grid = nn.functional.affine_grid(theta=theta[:, :sr], size=list(dst_size), align_corners=self.align_corners)
+        dst = nn.functional.grid_sample(
+            input=src.contiguous(),
+            grid=grid,
+            mode=self.mode.value,
+            padding_mode=self.padding_mode.value,
+            align_corners=self.align_corners,
+        )
+        return dst
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..42fac587164d829aa41c16ae71d13670e092ec50
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/utils.py
@@ -0,0 +1,116 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple, Union
+
+from monai.networks.layers.factories import Act, Dropout, Norm, Pool, split_args
+from monai.utils import has_option
+
+__all__ = ["get_norm_layer", "get_act_layer", "get_dropout_layer", "get_pool_layer"]
+
+
+def get_norm_layer(name: Union[Tuple, str], spatial_dims: Optional[int] = 1, channels: Optional[int] = 1):
+    """
+    Create a normalization layer instance.
+
+    For example, to create normalization layers:
+
+    .. code-block:: python
+
+        from monai.networks.layers import get_norm_layer
+
+        g_layer = get_norm_layer(name=("group", {"num_groups": 1}))
+        n_layer = get_norm_layer(name="instance", spatial_dims=2)
+
+    Args:
+        name: a normalization type string or a tuple of type string and parameters.
+        spatial_dims: number of spatial dimensions of the input.
+        channels: number of features/channels when the normalization layer requires this parameter
+            but it is not specified in the norm parameters.
+    """
+    norm_name, norm_args = split_args(name)
+    norm_type = Norm[norm_name, spatial_dims]
+    kw_args = dict(norm_args)
+    if has_option(norm_type, "num_features") and "num_features" not in kw_args:
+        kw_args["num_features"] = channels
+    if has_option(norm_type, "num_channels") and "num_channels" not in kw_args:
+        kw_args["num_channels"] = channels
+    return norm_type(**kw_args)
+
+
+def get_act_layer(name: Union[Tuple, str]):
+    """
+    Create an activation layer instance.
+
+    For example, to create activation layers:
+
+    .. code-block:: python
+
+        from monai.networks.layers import get_act_layer
+
+        s_layer = get_act_layer(name="swish")
+        p_layer = get_act_layer(name=("prelu", {"num_parameters": 1, "init": 0.25}))
+
+    Args:
+        name: an activation type string or a tuple of type string and parameters.
+    """
+    act_name, act_args = split_args(name)
+    act_type = Act[act_name]
+    return act_type(**act_args)
+
+
+def get_dropout_layer(name: Union[Tuple, str, float, int], dropout_dim: Optional[int] = 1):
+    """
+    Create a dropout layer instance.
+
+    For example, to create dropout layers:
+
+    .. code-block:: python
+
+        from monai.networks.layers import get_dropout_layer
+
+        d_layer = get_dropout_layer(name="dropout")
+        a_layer = get_dropout_layer(name=("alphadropout", {"p": 0.25}))
+
+    Args:
+        name: a dropout ratio or a tuple of dropout type and parameters.
+        dropout_dim: the spatial dimension of the dropout operation.
+    """
+    if isinstance(name, (int, float)):
+        # if dropout was specified simply as a p value, use default name and make a keyword map with the value
+        drop_name = Dropout.DROPOUT
+        drop_args = {"p": float(name)}
+    else:
+        drop_name, drop_args = split_args(name)
+    drop_type = Dropout[drop_name, dropout_dim]
+    return drop_type(**drop_args)
+
+
+def get_pool_layer(name: Union[Tuple, str], spatial_dims: Optional[int] = 1):
+    """
+    Create a pooling layer instance.
+
+    For example, to create adaptiveavg layer:
+
+    .. code-block:: python
+
+        from monai.networks.layers import get_pool_layer
+
+        pool_layer = get_pool_layer(("adaptiveavg", {"output_size": (1, 1, 1)}), spatial_dims=3)
+
+    Args:
+        name: a pooling type string or a tuple of type string and parameters.
+        spatial_dims: number of spatial dimensions of the input.
+
+    """
+    pool_name, pool_args = split_args(name)
+    pool_type = Pool[pool_name, spatial_dims]
+    return pool_type(**pool_args)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/weight_init.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/weight_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b81ef17f87b6eae245264337458e2135449ae05
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/layers/weight_init.py
@@ -0,0 +1,64 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import torch
+
+
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    """Tensor initialization with truncated normal distribution.
+    Based on:
+    https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    https://github.com/rwightman/pytorch-image-models
+
+    Args:
+       tensor: an n-dimensional `torch.Tensor`.
+       mean: the mean of the normal distribution.
+       std: the standard deviation of the normal distribution.
+       a: the minimum cutoff value.
+       b: the maximum cutoff value.
+    """
+
+    def norm_cdf(x):
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+
+    with torch.no_grad():
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        tensor.erfinv_()
+        tensor.mul_(std * math.sqrt(2.0))
+        tensor.add_(mean)
+        tensor.clamp_(min=a, max=b)
+        return tensor
+
+
+def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
+    """Tensor initialization with truncated normal distribution.
+    Based on:
+    https://github.com/rwightman/pytorch-image-models
+
+    Args:
+       tensor: an n-dimensional `torch.Tensor`
+       mean: the mean of the normal distribution
+       std: the standard deviation of the normal distribution
+       a: the minimum cutoff value
+       b: the maximum cutoff value
+    """
+
+    if not std > 0:
+        raise ValueError("the standard deviation should be greater than zero.")
+
+    if a >= b:
+        raise ValueError("minimum cutoff value (a) should be smaller than maximum cutoff value (b).")
+
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a85d55769c708d5978de2fd9d83d7b2cd14460ed
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__init__.py
@@ -0,0 +1,91 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .ahnet import AHnet, Ahnet, AHNet
+from .attentionunet import AttentionUnet
+from .autoencoder import AutoEncoder
+from .basic_unet import BasicUNet, BasicUnet, Basicunet, basicunet
+from .classifier import Classifier, Critic, Discriminator
+from .densenet import (
+    DenseNet,
+    Densenet,
+    DenseNet121,
+    Densenet121,
+    DenseNet169,
+    Densenet169,
+    DenseNet201,
+    Densenet201,
+    DenseNet264,
+    Densenet264,
+    densenet121,
+    densenet169,
+    densenet201,
+    densenet264,
+)
+from .dints import DiNTS, TopologyConstruction, TopologyInstance, TopologySearch
+from .dynunet import DynUNet, DynUnet, Dynunet
+from .efficientnet import (
+    BlockArgs,
+    EfficientNet,
+    EfficientNetBN,
+    EfficientNetBNFeatures,
+    drop_connect,
+    get_efficientnet_image_size,
+)
+from .fullyconnectednet import FullyConnectedNet, VarFullyConnectedNet
+from .generator import Generator
+from .highresnet import HighResBlock, HighResNet
+from .milmodel import MILModel
+from .netadapter import NetAdapter
+from .regressor import Regressor
+from .regunet import GlobalNet, LocalNet, RegUNet
+from .resnet import ResNet, resnet10, resnet18, resnet34, resnet50, resnet101, resnet152, resnet200
+from .segresnet import SegResNet, SegResNetVAE
+from .senet import (
+    SENet,
+    SEnet,
+    Senet,
+    SENet154,
+    SEnet154,
+    Senet154,
+    SEResNet50,
+    SEresnet50,
+    Seresnet50,
+    SEResNet101,
+    SEresnet101,
+    Seresnet101,
+    SEResNet152,
+    SEresnet152,
+    Seresnet152,
+    SEResNext50,
+    SEResNeXt50,
+    SEresnext50,
+    Seresnext50,
+    SEResNext101,
+    SEResNeXt101,
+    SEresnext101,
+    Seresnext101,
+    senet154,
+    seresnet50,
+    seresnet101,
+    seresnet152,
+    seresnext50,
+    seresnext101,
+)
+from .swin_unetr import SwinUNETR
+from .torchvision_fc import TorchVisionFCModel
+from .transchex import BertAttention, BertMixedLayer, BertOutput, BertPreTrainedModel, MultiModal, Pooler, Transchex
+from .unet import UNet, Unet
+from .unetr import UNETR
+from .varautoencoder import VarAutoEncoder
+from .vit import ViT
+from .vitautoenc import ViTAutoEnc
+from .vnet import VNet
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/attentionunet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/attentionunet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04af46e5a6e4775a027094dd15803fdbf8323334
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/attentionunet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/autoencoder.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/autoencoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c299a71aa52e418cfefcb5e69d18c067e4ff2010
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/autoencoder.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/basic_unet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/basic_unet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99a1b80c71828dab617138fe24288a0a57466e85
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/basic_unet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/dynunet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/dynunet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0b4682d470a47d5ceb7f53570c4d75ea07211e4
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/dynunet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/efficientnet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/efficientnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0bf9d8c000a474b147ad36cb5a588cf8616bc314
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/efficientnet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/fullyconnectednet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/fullyconnectednet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f240bbb1a47ab6775e1c18bf7fa8cad122604e3b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/fullyconnectednet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/netadapter.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/netadapter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af28430ed0475d990cbc8809c26453e3d86a080b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/netadapter.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regressor.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regressor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45a74d6f8d003d40a124056f297f8fbbcaa9b180
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regressor.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regunet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regunet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9cf1a76e36aa727a6f94608d5ecd6d2b9db92858
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/regunet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/resnet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/resnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c99a04b2b476cfbdf32b9cf17bcf4512107f3973
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/resnet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/segresnet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/segresnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f5241c28311fb71e8c8db4409dcd21f2fd802d40
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/segresnet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/senet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/senet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..56edd42268d8b8e34641a5397f61c364f6f267be
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/senet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/swin_unetr.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/swin_unetr.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..112fb86cea03d11656a43162f5d325cb8f2b82f3
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/swin_unetr.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/torchvision_fc.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/torchvision_fc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b6ff4325aaa16b9b6dcc9d54664a4b61445ca698
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/torchvision_fc.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0277aad354a813aeb9103c23dea0e3808894a686
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unetr.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unetr.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16a0e88e25a7ab498f8c64dc7b129fce947695f6
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/unetr.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/varautoencoder.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/varautoencoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8767bef3851059b80c4e7ddfd90ed6528a19f646
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/varautoencoder.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vit.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vit.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..28d0b7199acbe96e849d4b0906f8a41ed468af2a
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vit.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vnet.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..556db00037df5b20154604691ef8d2a45d102ebb
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/__pycache__/vnet.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/attentionunet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/attentionunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..177a54e105ba1b8f2c720da92cc587a35aef84aa
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/attentionunet.py
@@ -0,0 +1,257 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Sequence, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks.convolutions import Convolution
+from monai.networks.layers.factories import Norm
+
+__all__ = ["AttentionUnet"]
+
+
+class ConvBlock(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int = 3,
+        strides: int = 1,
+        dropout=0.0,
+    ):
+        super().__init__()
+        layers = [
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                strides=strides,
+                padding=None,
+                adn_ordering="NDA",
+                act="relu",
+                norm=Norm.BATCH,
+                dropout=dropout,
+            ),
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                strides=1,
+                padding=None,
+                adn_ordering="NDA",
+                act="relu",
+                norm=Norm.BATCH,
+                dropout=dropout,
+            ),
+        ]
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_c: torch.Tensor = self.conv(x)
+        return x_c
+
+
+class UpConv(nn.Module):
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int, kernel_size=3, strides=2, dropout=0.0):
+        super().__init__()
+        self.up = Convolution(
+            spatial_dims,
+            in_channels,
+            out_channels,
+            strides=strides,
+            kernel_size=kernel_size,
+            act="relu",
+            adn_ordering="NDA",
+            norm=Norm.BATCH,
+            dropout=dropout,
+            is_transposed=True,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_u: torch.Tensor = self.up(x)
+        return x_u
+
+
+class AttentionBlock(nn.Module):
+    def __init__(self, spatial_dims: int, f_int: int, f_g: int, f_l: int, dropout=0.0):
+        super().__init__()
+        self.W_g = nn.Sequential(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=f_g,
+                out_channels=f_int,
+                kernel_size=1,
+                strides=1,
+                padding=0,
+                dropout=dropout,
+                conv_only=True,
+            ),
+            Norm[Norm.BATCH, spatial_dims](f_int),
+        )
+
+        self.W_x = nn.Sequential(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=f_l,
+                out_channels=f_int,
+                kernel_size=1,
+                strides=1,
+                padding=0,
+                dropout=dropout,
+                conv_only=True,
+            ),
+            Norm[Norm.BATCH, spatial_dims](f_int),
+        )
+
+        self.psi = nn.Sequential(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=f_int,
+                out_channels=1,
+                kernel_size=1,
+                strides=1,
+                padding=0,
+                dropout=dropout,
+                conv_only=True,
+            ),
+            Norm[Norm.BATCH, spatial_dims](1),
+            nn.Sigmoid(),
+        )
+
+        self.relu = nn.ReLU()
+
+    def forward(self, g: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
+        g1 = self.W_g(g)
+        x1 = self.W_x(x)
+        psi: torch.Tensor = self.relu(g1 + x1)
+        psi = self.psi(psi)
+
+        return x * psi
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int, submodule: nn.Module, dropout=0.0):
+        super().__init__()
+        self.attention = AttentionBlock(
+            spatial_dims=spatial_dims, f_g=in_channels, f_l=in_channels, f_int=in_channels // 2
+        )
+        self.upconv = UpConv(spatial_dims=spatial_dims, in_channels=out_channels, out_channels=in_channels, strides=2)
+        self.merge = Convolution(
+            spatial_dims=spatial_dims, in_channels=2 * in_channels, out_channels=in_channels, dropout=dropout
+        )
+        self.submodule = submodule
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        fromlower = self.upconv(self.submodule(x))
+        att = self.attention(g=fromlower, x=x)
+        att_m: torch.Tensor = self.merge(torch.cat((att, fromlower), dim=1))
+        return att_m
+
+
+class AttentionUnet(nn.Module):
+    """
+    Attention Unet based on
+    Otkay et al. "Attention U-Net: Learning Where to Look for the Pancreas"
+    https://arxiv.org/abs/1804.03999
+
+    Args:
+        spatial_dims: number of spatial dimensions of the input image.
+        in_channels: number of the input channel.
+        out_channels: number of the output classes.
+        channels (Sequence[int]): sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides (Sequence[int]): stride to use for convolutions.
+        kernel_size: convolution kernel size.
+        upsample_kernel_size: convolution kernel size for transposed convolution layers.
+        dropout: dropout ratio. Defaults to no dropout.
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        up_kernel_size: Union[Sequence[int], int] = 3,
+        dropout: float = 0.0,
+    ):
+        super().__init__()
+        self.dimensions = spatial_dims
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.channels = channels
+        self.strides = strides
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+
+        head = ConvBlock(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=channels[0], dropout=dropout)
+        reduce_channels = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=channels[0],
+            out_channels=out_channels,
+            kernel_size=1,
+            strides=1,
+            padding=0,
+            conv_only=True,
+        )
+        self.up_kernel_size = up_kernel_size
+
+        def _create_block(channels: Sequence[int], strides: Sequence[int], level: int = 0) -> nn.Module:
+            if len(channels) > 2:
+                subblock = _create_block(channels[1:], strides[1:], level=level + 1)
+                return AttentionLayer(
+                    spatial_dims=spatial_dims,
+                    in_channels=channels[0],
+                    out_channels=channels[1],
+                    submodule=nn.Sequential(
+                        ConvBlock(
+                            spatial_dims=spatial_dims,
+                            in_channels=channels[0],
+                            out_channels=channels[1],
+                            strides=strides[0],
+                            dropout=self.dropout,
+                        ),
+                        subblock,
+                    ),
+                    dropout=dropout,
+                )
+            else:
+                # the next layer is the bottom so stop recursion,
+                # create the bottom layer as the sublock for this layer
+                return self._get_bottom_layer(channels[0], channels[1], strides[0], level=level + 1)
+
+        encdec = _create_block(self.channels, self.strides)
+        self.model = nn.Sequential(head, encdec, reduce_channels)
+
+    def _get_bottom_layer(self, in_channels: int, out_channels: int, strides: int, level: int) -> nn.Module:
+        return AttentionLayer(
+            spatial_dims=self.dimensions,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            submodule=ConvBlock(
+                spatial_dims=self.dimensions,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                strides=strides,
+                dropout=self.dropout,
+            ),
+            dropout=self.dropout,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_m: torch.Tensor = self.model(x)
+        return x_m
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/autoencoder.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/autoencoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..75edde70ebb368d59f27ff4a1219afd87906f21a
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/autoencoder.py
@@ -0,0 +1,301 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks import Convolution, ResidualUnit
+from monai.networks.layers.factories import Act, Norm
+from monai.utils import deprecated_arg
+
+__all__ = ["AutoEncoder"]
+
+
+class AutoEncoder(nn.Module):
+    """
+    Simple definition of an autoencoder and base class for the architecture implementing
+    :py:class:`monai.networks.nets.VarAutoEncoder`. The network is composed of an encode sequence of blocks, followed
+    by an intermediary sequence of blocks, and finally a decode sequence of blocks. The encode and decode blocks are
+    default :py:class:`monai.networks.blocks.Convolution` instances with the encode blocks having the given stride
+    and the decode blocks having transpose convolutions with the same stride. If `num_res_units` is given residual
+    blocks are used instead.
+
+    By default the intermediary sequence is empty but if `inter_channels` is given to specify the output channels of
+    blocks then this will be become a sequence of Convolution blocks or of residual blocks if `num_inter_units` is
+    given. The optional parameter `inter_dilations` can be used to specify the dilation values of the convolutions in
+    these blocks, this allows a network to use dilated kernels in this  middle section. Since the intermediary section
+    isn't meant to change the size of the output the strides for all these kernels is 1.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import AutoEncoder
+
+        # 3 layers each down/up sampling their inputs by a factor 2 with no intermediate layer
+        net = AutoEncoder(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(2, 4, 8),
+            strides=(2, 2, 2)
+        )
+
+        # 1 layer downsampling by 2, followed by a sequence of residual units with 2 convolutions defined by
+        # progressively increasing dilations, then final upsample layer
+        net = AutoEncoder(
+                spatial_dims=2,
+                in_channels=1,
+                out_channels=1,
+                channels=(4,),
+                strides=(2,),
+                inter_channels=(8, 8, 8),
+                inter_dilations=(1, 2, 4),
+                num_inter_units=2
+            )
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        up_kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 0,
+        inter_channels: Optional[list] = None,
+        inter_dilations: Optional[list] = None,
+        num_inter_units: int = 2,
+        act: Optional[Union[Tuple, str]] = Act.PRELU,
+        norm: Union[Tuple, str] = Norm.INSTANCE,
+        dropout: Optional[Union[Tuple, str, float]] = None,
+        bias: bool = True,
+        dimensions: Optional[int] = None,
+    ) -> None:
+
+        super().__init__()
+        self.dimensions = spatial_dims if dimensions is None else dimensions
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.channels = list(channels)
+        self.strides = list(strides)
+        self.kernel_size = kernel_size
+        self.up_kernel_size = up_kernel_size
+        self.num_res_units = num_res_units
+        self.act = act
+        self.norm = norm
+        self.dropout = dropout
+        self.bias = bias
+        self.num_inter_units = num_inter_units
+        self.inter_channels = inter_channels if inter_channels is not None else []
+        self.inter_dilations = list(inter_dilations or [1] * len(self.inter_channels))
+
+        # The number of channels and strides should match
+        if len(channels) != len(strides):
+            raise ValueError("Autoencoder expects matching number of channels and strides")
+
+        self.encoded_channels = in_channels
+        decode_channel_list = list(channels[-2::-1]) + [out_channels]
+
+        self.encode, self.encoded_channels = self._get_encode_module(self.encoded_channels, channels, strides)
+        self.intermediate, self.encoded_channels = self._get_intermediate_module(self.encoded_channels, num_inter_units)
+        self.decode, _ = self._get_decode_module(self.encoded_channels, decode_channel_list, strides[::-1] or [1])
+
+    def _get_encode_module(
+        self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
+    ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the encode part of the network by building up a sequence of layers returned by `_get_encode_layer`.
+        """
+        encode = nn.Sequential()
+        layer_channels = in_channels
+
+        for i, (c, s) in enumerate(zip(channels, strides)):
+            layer = self._get_encode_layer(layer_channels, c, s, False)
+            encode.add_module("encode_%i" % i, layer)
+            layer_channels = c
+
+        return encode, layer_channels
+
+    def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tuple[nn.Module, int]:
+        """
+        Returns the intermediate block of the network which accepts input from the encoder and whose output goes
+        to the decoder.
+        """
+        # Define some types
+        intermediate: nn.Module
+        unit: nn.Module
+
+        intermediate = nn.Identity()
+        layer_channels = in_channels
+
+        if self.inter_channels:
+            intermediate = nn.Sequential()
+
+            for i, (dc, di) in enumerate(zip(self.inter_channels, self.inter_dilations)):
+                if self.num_inter_units > 0:
+                    unit = ResidualUnit(
+                        spatial_dims=self.dimensions,
+                        in_channels=layer_channels,
+                        out_channels=dc,
+                        strides=1,
+                        kernel_size=self.kernel_size,
+                        subunits=self.num_inter_units,
+                        act=self.act,
+                        norm=self.norm,
+                        dropout=self.dropout,
+                        dilation=di,
+                        bias=self.bias,
+                    )
+                else:
+                    unit = Convolution(
+                        spatial_dims=self.dimensions,
+                        in_channels=layer_channels,
+                        out_channels=dc,
+                        strides=1,
+                        kernel_size=self.kernel_size,
+                        act=self.act,
+                        norm=self.norm,
+                        dropout=self.dropout,
+                        dilation=di,
+                        bias=self.bias,
+                    )
+
+                intermediate.add_module("inter_%i" % i, unit)
+                layer_channels = dc
+
+        return intermediate, layer_channels
+
+    def _get_decode_module(
+        self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
+    ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the decode part of the network by building up a sequence of layers returned by `_get_decode_layer`.
+        """
+        decode = nn.Sequential()
+        layer_channels = in_channels
+
+        for i, (c, s) in enumerate(zip(channels, strides)):
+            layer = self._get_decode_layer(layer_channels, c, s, i == (len(strides) - 1))
+            decode.add_module("decode_%i" % i, layer)
+            layer_channels = c
+
+        return decode, layer_channels
+
+    def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Module:
+        """
+        Returns a single layer of the encoder part of the network.
+        """
+        mod: nn.Module
+        if self.num_res_units > 0:
+            mod = ResidualUnit(
+                spatial_dims=self.dimensions,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                strides=strides,
+                kernel_size=self.kernel_size,
+                subunits=self.num_res_units,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+                last_conv_only=is_last,
+            )
+        mod = Convolution(
+            spatial_dims=self.dimensions,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            strides=strides,
+            kernel_size=self.kernel_size,
+            act=self.act,
+            norm=self.norm,
+            dropout=self.dropout,
+            bias=self.bias,
+            conv_only=is_last,
+        )
+        return mod
+
+    def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Sequential:
+        """
+        Returns a single layer of the decoder part of the network.
+        """
+        decode = nn.Sequential()
+
+        conv = Convolution(
+            spatial_dims=self.dimensions,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            strides=strides,
+            kernel_size=self.up_kernel_size,
+            act=self.act,
+            norm=self.norm,
+            dropout=self.dropout,
+            bias=self.bias,
+            conv_only=is_last and self.num_res_units == 0,
+            is_transposed=True,
+        )
+
+        decode.add_module("conv", conv)
+
+        if self.num_res_units > 0:
+            ru = ResidualUnit(
+                spatial_dims=self.dimensions,
+                in_channels=out_channels,
+                out_channels=out_channels,
+                strides=1,
+                kernel_size=self.kernel_size,
+                subunits=1,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+                last_conv_only=is_last,
+            )
+
+            decode.add_module("resunit", ru)
+
+        return decode
+
+    def forward(self, x: torch.Tensor) -> Any:
+        x = self.encode(x)
+        x = self.intermediate(x)
+        x = self.decode(x)
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/classifier.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/classifier.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f4e43eedbd958984ad58eeae15e09d509b71605
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/classifier.py
@@ -0,0 +1,139 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.layers.factories import Act, Norm, split_args
+from monai.networks.nets.regressor import Regressor
+
+__all__ = ["Classifier", "Discriminator", "Critic"]
+
+
+class Classifier(Regressor):
+    """
+    Defines a classification network from Regressor by specifying the output shape as a single dimensional tensor
+    with size equal to the number of classes to predict. The final activation function can also be specified, eg.
+    softmax or sigmoid.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        classes: integer stating the dimension of the final output tensor
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
+    """
+
+    def __init__(
+        self,
+        in_shape: Sequence[int],
+        classes: int,
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 2,
+        act=Act.PRELU,
+        norm=Norm.INSTANCE,
+        dropout: Optional[float] = None,
+        bias: bool = True,
+        last_act: Optional[str] = None,
+    ) -> None:
+        super().__init__(in_shape, (classes,), channels, strides, kernel_size, num_res_units, act, norm, dropout, bias)
+
+        if last_act is not None:
+            last_act_name, last_act_args = split_args(last_act)
+            last_act_type = Act[last_act_name]
+
+            self.final.add_module("lastact", last_act_type(**last_act_args))
+
+
+class Discriminator(Classifier):
+    """
+    Defines a discriminator network from Classifier with a single output value and sigmoid activation by default. This
+    is meant for use with GANs or other applications requiring a generic discriminator network.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
+    """
+
+    def __init__(
+        self,
+        in_shape: Sequence[int],
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 2,
+        act=Act.PRELU,
+        norm=Norm.INSTANCE,
+        dropout: Optional[float] = 0.25,
+        bias: bool = True,
+        last_act=Act.SIGMOID,
+    ) -> None:
+        super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, last_act)
+
+
+class Critic(Classifier):
+    """
+    Defines a critic network from Classifier with a single output value and no final activation. The final layer is
+    `nn.Flatten` instead of `nn.Linear`, the final result is computed as the mean over the first dimension. This is
+    meant to be used with Wasserstein GANs.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+    """
+
+    def __init__(
+        self,
+        in_shape: Sequence[int],
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 2,
+        act=Act.PRELU,
+        norm=Norm.INSTANCE,
+        dropout: Optional[float] = 0.25,
+        bias: bool = True,
+    ) -> None:
+        super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, None)
+
+    def _get_final_layer(self, in_shape: Sequence[int]):
+        return nn.Flatten()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.net(x)
+        x = self.final(x)
+        x = x.mean(1)
+        return x.view((x.shape[0], -1))
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dints.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dints.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7f3921a477f555b8e03cc80bb0dc07c514aaf0a
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dints.py
@@ -0,0 +1,1040 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import warnings
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from monai.networks.blocks.dints_block import (
+    ActiConvNormBlock,
+    FactorizedIncreaseBlock,
+    FactorizedReduceBlock,
+    P3DActiConvNormBlock,
+)
+from monai.networks.layers.factories import Conv
+from monai.networks.layers.utils import get_act_layer, get_norm_layer
+from monai.utils import optional_import
+
+# solving shortest path problem
+csr_matrix, _ = optional_import("scipy.sparse", name="csr_matrix")
+dijkstra, _ = optional_import("scipy.sparse.csgraph", name="dijkstra")
+
+__all__ = ["DiNTS", "TopologyConstruction", "TopologyInstance", "TopologySearch"]
+
+
+@torch.jit.interface
+class CellInterface(torch.nn.Module):
+    """interface for torchscriptable Cell"""
+
+    def forward(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        pass
+
+
+@torch.jit.interface
+class StemInterface(torch.nn.Module):
+    """interface for torchscriptable Stem"""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        pass
+
+
+class StemTS(StemInterface):
+    """wrapper for torchscriptable Stem"""
+
+    def __init__(self, *mod):
+        super().__init__()
+        self.mod = torch.nn.Sequential(*mod)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.mod(x)  # type: ignore
+
+
+def _dfs(node, paths):
+    """use depth first search to find all path activation combination"""
+    if node == paths:
+        return [[0], [1]]
+    child = _dfs(node + 1, paths)
+    return [[0] + _ for _ in child] + [[1] + _ for _ in child]
+
+
+class _IdentityWithRAMCost(nn.Identity):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ram_cost = 0
+
+
+class _CloseWithRAMCost(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ram_cost = 0
+
+    def forward(self, x):
+        return torch.tensor(0.0, requires_grad=False).to(x)
+
+
+class _ActiConvNormBlockWithRAMCost(ActiConvNormBlock):
+    """The class wraps monai layers with ram estimation. The ram_cost = total_ram/output_size is estimated.
+    Here is the estimation:
+     feature_size = output_size/out_channel
+     total_ram = ram_cost * output_size
+     total_ram = in_channel * feature_size (activation map) +
+                 in_channel * feature_size (convolution map) +
+                 out_channel * feature_size (normalization)
+               = (2*in_channel + out_channel) * output_size/out_channel
+     ram_cost = total_ram/output_size = 2 * in_channel/out_channel + 1
+    """
+
+    def __init__(
+        self,
+        in_channel: int,
+        out_channel: int,
+        kernel_size: int,
+        padding: int,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+    ):
+        super().__init__(in_channel, out_channel, kernel_size, padding, spatial_dims, act_name, norm_name)
+        self.ram_cost = 1 + in_channel / out_channel * 2
+
+
+class _P3DActiConvNormBlockWithRAMCost(P3DActiConvNormBlock):
+    def __init__(
+        self,
+        in_channel: int,
+        out_channel: int,
+        kernel_size: int,
+        padding: int,
+        p3dmode: int = 0,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+    ):
+        super().__init__(in_channel, out_channel, kernel_size, padding, p3dmode, act_name, norm_name)
+        # 1 in_channel (activation) + 1 in_channel (convolution) +
+        # 1 out_channel (convolution) + 1 out_channel (normalization)
+        self.ram_cost = 2 + 2 * in_channel / out_channel
+
+
+class _FactorizedIncreaseBlockWithRAMCost(FactorizedIncreaseBlock):
+    def __init__(
+        self,
+        in_channel: int,
+        out_channel: int,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+    ):
+        super().__init__(in_channel, out_channel, spatial_dims, act_name, norm_name)
+        # s0 is upsampled 2x from s1, representing feature sizes at two resolutions.
+        # 2 * in_channel * s0 (upsample + activation) + 2 * out_channel * s0 (conv + normalization)
+        # s0 = output_size/out_channel
+        self.ram_cost = 2 * in_channel / out_channel + 2
+
+
+class _FactorizedReduceBlockWithRAMCost(FactorizedReduceBlock):
+    def __init__(
+        self,
+        in_channel: int,
+        out_channel: int,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+    ):
+        super().__init__(in_channel, out_channel, spatial_dims, act_name, norm_name)
+        # s0 is upsampled 2x from s1, representing feature sizes at two resolutions.
+        # in_channel * s0 (activation) + 3 * out_channel * s1 (convolution, concatenation, normalization)
+        # s0 = s1 * 2^(spatial_dims) = output_size / out_channel * 2^(spatial_dims)
+        self.ram_cost = in_channel / out_channel * 2**self._spatial_dims + 3
+
+
+class MixedOp(nn.Module):
+    """
+    The weighted averaging of cell operations.
+    Args:
+        c: number of output channels.
+        ops: a dictionary of operations. See also: ``Cell.OPS2D`` or ``Cell.OPS3D``.
+        arch_code_c: binary cell operation code. It represents the operation results added to the output.
+    """
+
+    def __init__(self, c: int, ops: dict, arch_code_c=None):
+        super().__init__()
+        if arch_code_c is None:
+            arch_code_c = np.ones(len(ops))
+        self.ops = nn.ModuleList()
+        for arch_c, op_name in zip(arch_code_c, ops):
+            self.ops.append(_CloseWithRAMCost() if arch_c == 0 else ops[op_name](c))
+
+    def forward(self, x: torch.Tensor, weight: torch.Tensor):
+        """
+        Args:
+            x: input tensor.
+            weight: learnable architecture weights for cell operations. arch_code_c are derived from it.
+        Return:
+            out: weighted average of the operation results.
+        """
+        out = 0.0
+        weight = weight.to(x)
+        for idx, _op in enumerate(self.ops):
+            out = out + _op(x) * weight[idx]
+        return out
+
+
+class Cell(CellInterface):
+    """
+    The basic class for cell operation search, which contains a preprocessing operation and a mixed cell operation.
+    Each cell is defined on a `path` in the topology search space.
+    Args:
+        c_prev: number of input channels
+        c: number of output channels
+        rate: resolution change rate. It represents the preprocessing operation before the mixed cell operation.
+            ``-1`` for 2x downsample, ``1`` for 2x upsample, ``0`` for no change of resolution.
+        arch_code_c: cell operation code
+    """
+
+    DIRECTIONS = 3
+    # Possible output paths for `Cell`.
+    #
+    #       - UpSample
+    #      /
+    # +--+/
+    # |  |--- Identity or AlignChannels
+    # +--+\
+    #      \
+    #       - Downsample
+
+    # Define 2D operation set, parameterized by the number of channels
+    OPS2D = {
+        "skip_connect": lambda _c: _IdentityWithRAMCost(),
+        "conv_3x3": lambda c: _ActiConvNormBlockWithRAMCost(c, c, 3, padding=1, spatial_dims=2),
+    }
+
+    # Define 3D operation set, parameterized by the number of channels
+    OPS3D = {
+        "skip_connect": lambda _c: _IdentityWithRAMCost(),
+        "conv_3x3x3": lambda c: _ActiConvNormBlockWithRAMCost(c, c, 3, padding=1, spatial_dims=3),
+        "conv_3x3x1": lambda c: _P3DActiConvNormBlockWithRAMCost(c, c, 3, padding=1, p3dmode=0),
+        "conv_3x1x3": lambda c: _P3DActiConvNormBlockWithRAMCost(c, c, 3, padding=1, p3dmode=1),
+        "conv_1x3x3": lambda c: _P3DActiConvNormBlockWithRAMCost(c, c, 3, padding=1, p3dmode=2),
+    }
+
+    # Define connection operation set, parameterized by the number of channels
+    ConnOPS = {
+        "up": _FactorizedIncreaseBlockWithRAMCost,
+        "down": _FactorizedReduceBlockWithRAMCost,
+        "identity": _IdentityWithRAMCost,
+        "align_channels": _ActiConvNormBlockWithRAMCost,
+    }
+
+    def __init__(
+        self,
+        c_prev: int,
+        c: int,
+        rate: int,
+        arch_code_c=None,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+    ):
+        super().__init__()
+        self._spatial_dims = spatial_dims
+        self._act_name = act_name
+        self._norm_name = norm_name
+
+        if rate == -1:  # downsample
+            self.preprocess = self.ConnOPS["down"](
+                c_prev, c, spatial_dims=self._spatial_dims, act_name=self._act_name, norm_name=self._norm_name
+            )
+        elif rate == 1:  # upsample
+            self.preprocess = self.ConnOPS["up"](
+                c_prev, c, spatial_dims=self._spatial_dims, act_name=self._act_name, norm_name=self._norm_name
+            )
+        else:
+            if c_prev == c:
+                self.preprocess = self.ConnOPS["identity"]()
+            else:
+                self.preprocess = self.ConnOPS["align_channels"](
+                    c_prev, c, 1, 0, spatial_dims=self._spatial_dims, act_name=self._act_name, norm_name=self._norm_name
+                )
+
+        # Define 2D operation set, parameterized by the number of channels
+        self.OPS2D = {
+            "skip_connect": lambda _c: _IdentityWithRAMCost(),
+            "conv_3x3": lambda c: _ActiConvNormBlockWithRAMCost(
+                c, c, 3, padding=1, spatial_dims=2, act_name=self._act_name, norm_name=self._norm_name
+            ),
+        }
+
+        # Define 3D operation set, parameterized by the number of channels
+        self.OPS3D = {
+            "skip_connect": lambda _c: _IdentityWithRAMCost(),
+            "conv_3x3x3": lambda c: _ActiConvNormBlockWithRAMCost(
+                c, c, 3, padding=1, spatial_dims=3, act_name=self._act_name, norm_name=self._norm_name
+            ),
+            "conv_3x3x1": lambda c: _P3DActiConvNormBlockWithRAMCost(
+                c, c, 3, padding=1, p3dmode=0, act_name=self._act_name, norm_name=self._norm_name
+            ),
+            "conv_3x1x3": lambda c: _P3DActiConvNormBlockWithRAMCost(
+                c, c, 3, padding=1, p3dmode=1, act_name=self._act_name, norm_name=self._norm_name
+            ),
+            "conv_1x3x3": lambda c: _P3DActiConvNormBlockWithRAMCost(
+                c, c, 3, padding=1, p3dmode=2, act_name=self._act_name, norm_name=self._norm_name
+            ),
+        }
+
+        self.OPS = {}
+        if self._spatial_dims == 2:
+            self.OPS = self.OPS2D
+        elif self._spatial_dims == 3:
+            self.OPS = self.OPS3D
+        else:
+            raise NotImplementedError(f"Spatial dimensions {self._spatial_dims} is not supported.")
+
+        self.op = MixedOp(c, self.OPS, arch_code_c)
+
+    def forward(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: input tensor
+            weight: weights for different operations.
+        """
+        x = self.preprocess(x)
+        x = self.op(x, weight)
+        return x
+
+
+class DiNTS(nn.Module):
+    """
+    Reimplementation of DiNTS based on
+    "DiNTS: Differentiable Neural Network Topology Search for 3D Medical Image Segmentation
+    <https://arxiv.org/abs/2103.15954>".
+
+    The model contains a pre-defined multi-resolution stem block (defined in this class) and a
+    DiNTS space (defined in :py:class:`monai.networks.nets.TopologyInstance` and
+    :py:class:`monai.networks.nets.TopologySearch`).
+
+    The stem block is for: 1) input downsample and 2) output upsample to original size.
+    The model downsamples the input image by 2 (if ``use_downsample=True``).
+    The downsampled image is downsampled by [1, 2, 4, 8] times (``num_depths=4``) and used as input to the
+    DiNTS search space (``TopologySearch``) or the DiNTS instance (``TopologyInstance``).
+
+        - ``TopologyInstance`` is the final searched model. The initialization requires the searched architecture codes.
+        - ``TopologySearch`` is a multi-path topology and cell operation search space.
+          The architecture codes will be initialized as one.
+        - ``TopologyConstruction`` is the parent class which constructs the instance and search space.
+
+    To meet the requirements of the structure, the input size for each spatial dimension should be:
+    divisible by 2 ** (num_depths + 1).
+
+    Args:
+        dints_space: DiNTS search space. The value should be instance of `TopologyInstance` or `TopologySearch`.
+        in_channels: number of input image channels.
+        num_classes: number of output segmentation classes.
+        act_name: activation name, default to 'RELU'.
+        norm_name: normalization used in convolution blocks. Default to `InstanceNorm`.
+        spatial_dims: spatial 2D or 3D inputs.
+        use_downsample: use downsample in the stem.
+            If ``False``, the search space will be in resolution [1, 1/2, 1/4, 1/8],
+            if ``True``, the search space will be in resolution [1/2, 1/4, 1/8, 1/16].
+        node_a: node activation numpy matrix. Its shape is `(num_depths, num_blocks + 1)`.
+            +1 for multi-resolution inputs.
+            In model searching stage, ``node_a`` can be None. In deployment stage, ``node_a`` cannot be None.
+    """
+
+    def __init__(
+        self,
+        dints_space,
+        in_channels: int,
+        num_classes: int,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+        spatial_dims: int = 3,
+        use_downsample: bool = True,
+        node_a=None,
+    ):
+        super().__init__()
+
+        self.dints_space = dints_space
+        self.filter_nums = dints_space.filter_nums
+        self.num_blocks = dints_space.num_blocks
+        self.num_depths = dints_space.num_depths
+        if spatial_dims not in (2, 3):
+            raise NotImplementedError(f"Spatial dimensions {spatial_dims} is not supported.")
+        self._spatial_dims = spatial_dims
+        if node_a is None:
+            self.node_a = torch.ones((self.num_blocks + 1, self.num_depths))
+        else:
+            self.node_a = node_a
+
+        # define stem operations for every block
+        conv_type = Conv[Conv.CONV, spatial_dims]
+        self.stem_down = nn.ModuleDict()
+        self.stem_up = nn.ModuleDict()
+        self.stem_finals = nn.Sequential(
+            ActiConvNormBlock(
+                self.filter_nums[0],
+                self.filter_nums[0],
+                act_name=act_name,
+                norm_name=norm_name,
+                spatial_dims=spatial_dims,
+            ),
+            conv_type(
+                in_channels=self.filter_nums[0],
+                out_channels=num_classes,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=1,
+                bias=True,
+                dilation=1,
+            ),
+        )
+        mode = "trilinear" if self._spatial_dims == 3 else "bilinear"
+        for res_idx in range(self.num_depths):
+            # define downsample stems before DiNTS search
+            if use_downsample:
+                self.stem_down[str(res_idx)] = StemTS(
+                    nn.Upsample(scale_factor=1 / (2**res_idx), mode=mode, align_corners=True),
+                    conv_type(
+                        in_channels=in_channels,
+                        out_channels=self.filter_nums[res_idx],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        groups=1,
+                        bias=False,
+                        dilation=1,
+                    ),
+                    get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=self.filter_nums[res_idx]),
+                    get_act_layer(name=act_name),
+                    conv_type(
+                        in_channels=self.filter_nums[res_idx],
+                        out_channels=self.filter_nums[res_idx + 1],
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        groups=1,
+                        bias=False,
+                        dilation=1,
+                    ),
+                    get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=self.filter_nums[res_idx + 1]),
+                )
+                self.stem_up[str(res_idx)] = StemTS(
+                    get_act_layer(name=act_name),
+                    conv_type(
+                        in_channels=self.filter_nums[res_idx + 1],
+                        out_channels=self.filter_nums[res_idx],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        groups=1,
+                        bias=False,
+                        dilation=1,
+                    ),
+                    get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=self.filter_nums[res_idx]),
+                    nn.Upsample(scale_factor=2, mode=mode, align_corners=True),
+                )
+
+            else:
+                self.stem_down[str(res_idx)] = StemTS(
+                    nn.Upsample(scale_factor=1 / (2**res_idx), mode=mode, align_corners=True),
+                    conv_type(
+                        in_channels=in_channels,
+                        out_channels=self.filter_nums[res_idx],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        groups=1,
+                        bias=False,
+                        dilation=1,
+                    ),
+                    get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=self.filter_nums[res_idx]),
+                )
+                self.stem_up[str(res_idx)] = StemTS(
+                    get_act_layer(name=act_name),
+                    conv_type(
+                        in_channels=self.filter_nums[res_idx],
+                        out_channels=self.filter_nums[max(res_idx - 1, 0)],
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        groups=1,
+                        bias=False,
+                        dilation=1,
+                    ),
+                    get_norm_layer(
+                        name=norm_name, spatial_dims=spatial_dims, channels=self.filter_nums[max(res_idx - 1, 0)]
+                    ),
+                    nn.Upsample(scale_factor=2 ** (res_idx != 0), mode=mode, align_corners=True),
+                )
+
+    def weight_parameters(self):
+        return [param for name, param in self.named_parameters()]
+
+    def forward(self, x: torch.Tensor):
+        """
+        Prediction based on dynamic arch_code.
+
+        Args:
+            x: input tensor.
+        """
+        inputs = []
+        for d in range(self.num_depths):
+            # allow multi-resolution input
+            _mod_w: StemInterface = self.stem_down[str(d)]
+            x_out = _mod_w.forward(x)
+            if self.node_a[0][d]:
+                inputs.append(x_out)
+            else:
+                inputs.append(torch.zeros_like(x_out))
+
+        outputs = self.dints_space(inputs)
+
+        blk_idx = self.num_blocks - 1
+        start = False
+        _temp: torch.Tensor = torch.empty(0)
+        for res_idx in range(self.num_depths - 1, -1, -1):
+            _mod_up: StemInterface = self.stem_up[str(res_idx)]
+            if start:
+                _temp = _mod_up.forward(outputs[res_idx] + _temp)
+            elif self.node_a[blk_idx + 1][res_idx]:
+                start = True
+                _temp = _mod_up.forward(outputs[res_idx])
+        prediction = self.stem_finals(_temp)
+        return prediction
+
+
+class TopologyConstruction(nn.Module):
+    """
+    The base class for `TopologyInstance` and `TopologySearch`.
+
+    Args:
+        arch_code: `[arch_code_a, arch_code_c]`, numpy arrays. The architecture codes defining the model.
+            For example, for a ``num_depths=4, num_blocks=12`` search space:
+
+            - `arch_code_a` is a 12x10 (10 paths) binary matrix representing if a path is activated.
+            - `arch_code_c` is a 12x10x5 (5 operations) binary matrix representing if a cell operation is used.
+            - `arch_code` in ``__init__()`` is used for creating the network and remove unused network blocks. If None,
+
+            all paths and cells operations will be used, and must be in the searching stage (is_search=True).
+        channel_mul: adjust intermediate channel number, default is 1.
+        cell: operation of each node.
+        num_blocks: number of blocks (depth in the horizontal direction) of the DiNTS search space.
+        num_depths: number of image resolutions of the DiNTS search space: 1, 1/2, 1/4 ... in each dimension.
+        use_downsample: use downsample in the stem. If False, the search space will be in resolution [1, 1/2, 1/4, 1/8],
+            if True, the search space will be in resolution [1/2, 1/4, 1/8, 1/16].
+        device: `'cpu'`, `'cuda'`, or device ID.
+
+
+    Predefined variables:
+        `filter_nums`: default to 32. Double the number of channels after downsample.
+        topology related variables:
+
+            - `arch_code2in`: path activation to its incoming node index (resolution). For depth = 4,
+              arch_code2in = [0, 1, 0, 1, 2, 1, 2, 3, 2, 3]. The first path outputs from node 0 (top resolution),
+              the second path outputs from node 1 (second resolution in the search space),
+              the third path outputs from node 0, etc.
+            - `arch_code2ops`: path activation to operations of upsample 1, keep 0, downsample -1. For depth = 4,
+              arch_code2ops = [0, 1, -1, 0, 1, -1, 0, 1, -1, 0]. The first path does not change
+              resolution, the second path perform upsample, the third perform downsample, etc.
+            - `arch_code2out`: path activation to its output node index.
+              For depth = 4, arch_code2out = [0, 0, 1, 1, 1, 2, 2, 2, 3, 3],
+              the first and second paths connects to node 0 (top resolution), the 3,4,5 paths connects to node 1, etc.
+    """
+
+    def __init__(
+        self,
+        arch_code: Optional[list] = None,
+        channel_mul: float = 1.0,
+        cell=Cell,
+        num_blocks: int = 6,
+        num_depths: int = 3,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+        use_downsample: bool = True,
+        device: str = "cpu",
+    ):
+
+        super().__init__()
+
+        self.filter_nums = [int(n_feat * channel_mul) for n_feat in (32, 64, 128, 256, 512)]
+        self.num_blocks = num_blocks
+        self.num_depths = num_depths
+        self._spatial_dims = spatial_dims
+        self._act_name = act_name
+        self._norm_name = norm_name
+        self.use_downsample = use_downsample
+        self.device = device
+        self.num_cell_ops = 0
+        if self._spatial_dims == 2:
+            self.num_cell_ops = len(cell.OPS2D)
+        elif self._spatial_dims == 3:
+            self.num_cell_ops = len(cell.OPS3D)
+
+        # Calculate predefined parameters for topology search and decoding
+        arch_code2in, arch_code2out = [], []
+        for i in range(Cell.DIRECTIONS * self.num_depths - 2):
+            arch_code2in.append((i + 1) // Cell.DIRECTIONS - 1 + (i + 1) % Cell.DIRECTIONS)
+        arch_code2ops = ([-1, 0, 1] * self.num_depths)[1:-1]
+        for m in range(self.num_depths):
+            arch_code2out.extend([m, m, m])
+        arch_code2out = arch_code2out[1:-1]
+        self.arch_code2in = arch_code2in
+        self.arch_code2ops = arch_code2ops
+        self.arch_code2out = arch_code2out
+
+        # define NAS search space
+        if arch_code is None:
+            arch_code_a = torch.ones((self.num_blocks, len(self.arch_code2out))).to(self.device)
+            arch_code_c = torch.ones((self.num_blocks, len(self.arch_code2out), self.num_cell_ops)).to(self.device)
+        else:
+            arch_code_a = torch.from_numpy(arch_code[0]).to(self.device)
+            arch_code_c = F.one_hot(torch.from_numpy(arch_code[1]).to(torch.int64), self.num_cell_ops).to(self.device)
+
+        self.arch_code_a = arch_code_a
+        self.arch_code_c = arch_code_c
+        # define cell operation on each path
+        self.cell_tree = nn.ModuleDict()
+        for blk_idx in range(self.num_blocks):
+            for res_idx in range(len(self.arch_code2out)):
+                if self.arch_code_a[blk_idx, res_idx] == 1:
+                    self.cell_tree[str((blk_idx, res_idx))] = cell(
+                        self.filter_nums[self.arch_code2in[res_idx] + int(use_downsample)],
+                        self.filter_nums[self.arch_code2out[res_idx] + int(use_downsample)],
+                        self.arch_code2ops[res_idx],
+                        self.arch_code_c[blk_idx, res_idx],
+                        self._spatial_dims,
+                        self._act_name,
+                        self._norm_name,
+                    )
+
+    def forward(self, x):
+        """This function to be implemented by the architecture instances or search spaces."""
+        pass
+
+
+class TopologyInstance(TopologyConstruction):
+    """
+    Instance of the final searched architecture. Only used in re-training/inference stage.
+    """
+
+    def __init__(
+        self,
+        arch_code=None,
+        channel_mul: float = 1.0,
+        cell=Cell,
+        num_blocks: int = 6,
+        num_depths: int = 3,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+        use_downsample: bool = True,
+        device: str = "cpu",
+    ):
+        """
+        Initialize DiNTS topology search space of neural architectures.
+        """
+        if arch_code is None:
+            warnings.warn("arch_code not provided when not searching.")
+
+        super().__init__(
+            arch_code=arch_code,
+            channel_mul=channel_mul,
+            cell=cell,
+            num_blocks=num_blocks,
+            num_depths=num_depths,
+            spatial_dims=spatial_dims,
+            act_name=act_name,
+            norm_name=norm_name,
+            use_downsample=use_downsample,
+            device=device,
+        )
+
+    def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        """
+        Args:
+            x: input tensor.
+        """
+        # generate path activation probability
+        inputs, outputs = x, [torch.tensor(0.0).to(x[0])] * self.num_depths
+        for blk_idx in range(self.num_blocks):
+            outputs = [torch.tensor(0.0).to(x[0])] * self.num_depths
+            for res_idx, activation in enumerate(self.arch_code_a[blk_idx].data):
+                if activation:
+                    mod: CellInterface = self.cell_tree[str((blk_idx, res_idx))]
+                    _out = mod.forward(
+                        x=inputs[self.arch_code2in[res_idx]], weight=torch.ones_like(self.arch_code_c[blk_idx, res_idx])
+                    )
+                    outputs[self.arch_code2out[res_idx]] = outputs[self.arch_code2out[res_idx]] + _out
+            inputs = outputs
+
+        return inputs
+
+
+class TopologySearch(TopologyConstruction):
+    """
+    DiNTS topology search space of neural architectures.
+
+    Examples:
+
+    .. code-block:: python
+
+        from monai.networks.nets.dints import TopologySearch
+
+        topology_search_space = TopologySearch(
+            channel_mul=0.5, num_blocks=8, num_depths=4, use_downsample=True, spatial_dims=3)
+        topology_search_space.get_ram_cost_usage(in_size=(2, 16, 80, 80, 80), full=True)
+        multi_res_images = [
+            torch.randn(2, 16, 80, 80, 80),
+            torch.randn(2, 32, 40, 40, 40),
+            torch.randn(2, 64, 20, 20, 20),
+            torch.randn(2, 128, 10, 10, 10)]
+        prediction = topology_search_space(image)
+        for x in prediction: print(x.shape)
+        # torch.Size([2, 16, 80, 80, 80])
+        # torch.Size([2, 32, 40, 40, 40])
+        # torch.Size([2, 64, 20, 20, 20])
+        # torch.Size([2, 128, 10, 10, 10])
+
+    Class method overview:
+
+        - ``get_prob_a()``: convert learnable architecture weights to path activation probabilities.
+        - ``get_ram_cost_usage()``: get estimated ram cost.
+        - ``get_topology_entropy()``: get topology entropy loss in searching stage.
+        - ``decode()``: get final binarized architecture code.
+        - ``gen_mtx()``: generate variables needed for topology search.
+
+    Predefined variables:
+        - `tidx`: index used to convert path activation matrix T = (depth,depth) in transfer_mtx to
+          path activation arch_code (1,3*depth-2), for depth = 4, tidx = [0, 1, 4, 5, 6, 9, 10, 11, 14, 15],
+          A tidx (10 binary values) represents the path activation.
+        - `transfer_mtx`: feasible path activation matrix (denoted as T) given a node activation pattern.
+          It is used to convert path activation pattern (1, paths) to node activation (1, nodes)
+        - `node_act_list`: all node activation [2^num_depths-1, depth]. For depth = 4, there are 15 node activation
+          patterns, each of length 4. For example, [1,1,0,0] means nodes 0, 1 are activated (with input paths).
+        - `all_connect`: All possible path activations. For depth = 4,
+          all_connection has 1024 vectors of length 10 (10 paths).
+          The return value will exclude path activation of all 0.
+    """
+
+    def __init__(
+        self,
+        channel_mul: float = 1.0,
+        cell=Cell,
+        arch_code: Optional[list] = None,
+        num_blocks: int = 6,
+        num_depths: int = 3,
+        spatial_dims: int = 3,
+        act_name: Union[Tuple, str] = "RELU",
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+        use_downsample: bool = True,
+        device: str = "cpu",
+    ):
+        """
+        Initialize DiNTS topology search space of neural architectures.
+        """
+        super().__init__(
+            arch_code=arch_code,
+            channel_mul=channel_mul,
+            cell=cell,
+            num_blocks=num_blocks,
+            num_depths=num_depths,
+            spatial_dims=spatial_dims,
+            act_name=act_name,
+            norm_name=norm_name,
+            use_downsample=use_downsample,
+            device=device,
+        )
+
+        tidx = []
+        _d = Cell.DIRECTIONS
+        for i in range(_d * self.num_depths - 2):
+            tidx.append((i + 1) // _d * self.num_depths + (i + 1) // _d - 1 + (i + 1) % _d)
+        self.tidx = tidx
+        transfer_mtx, node_act_list, child_list = self.gen_mtx(num_depths)
+
+        self.node_act_list = np.asarray(node_act_list)
+        self.node_act_dict = {str(self.node_act_list[i]): i for i in range(len(self.node_act_list))}
+        self.transfer_mtx = transfer_mtx
+        self.child_list = np.asarray(child_list)
+
+        self.ram_cost = np.zeros((self.num_blocks, len(self.arch_code2out), self.num_cell_ops))
+        for blk_idx in range(self.num_blocks):
+            for res_idx in range(len(self.arch_code2out)):
+                if self.arch_code_a[blk_idx, res_idx] == 1:
+                    self.ram_cost[blk_idx, res_idx] = np.array(
+                        [
+                            op.ram_cost + self.cell_tree[str((blk_idx, res_idx))].preprocess.ram_cost
+                            for op in self.cell_tree[str((blk_idx, res_idx))].op.ops[: self.num_cell_ops]
+                        ]
+                    )
+
+        # define cell and macro architecture probabilities
+        self.log_alpha_c = nn.Parameter(
+            torch.zeros(self.num_blocks, len(self.arch_code2out), self.num_cell_ops)
+            .normal_(1, 0.01)
+            .to(self.device)
+            .requires_grad_()
+        )
+        self.log_alpha_a = nn.Parameter(
+            torch.zeros(self.num_blocks, len(self.arch_code2out)).normal_(0, 0.01).to(self.device).requires_grad_()
+        )
+        self._arch_param_names = ["log_alpha_a", "log_alpha_c"]
+
+    def gen_mtx(self, depth: int):
+        """
+        Generate elements needed in decoding and topology.
+
+            - `transfer_mtx`: feasible path activation matrix (denoted as T) given a node activation pattern.
+               It is used to convert path activation pattern (1, paths) to node activation (1, nodes)
+            - `node_act_list`: all node activation [2^num_depths-1, depth]. For depth = 4, there are 15 node activation
+               patterns, each of length 4. For example, [1,1,0,0] means nodes 0, 1 are activated (with input paths).
+            - `all_connect`: All possible path activations. For depth = 4,
+              all_connection has 1024 vectors of length 10 (10 paths).
+              The return value will exclude path activation of all 0.
+        """
+        # total paths in a block, each node has three output paths,
+        # except the two nodes at the top and the bottom scales
+        paths = Cell.DIRECTIONS * depth - 2
+
+        # for 10 paths, all_connect has 1024 possible path activations. [1 0 0 0 0 0 0 0 0 0] means the top
+        # path is activated.
+        all_connect = _dfs(0, paths - 1)
+
+        # Save all possible connections in mtx (might be redundant and infeasible)
+        mtx = []
+        for m in all_connect:
+            # convert path activation [1,paths] to path activation matrix [depth, depth]
+            ma = np.zeros((depth, depth))
+            for i in range(paths):
+                ma[(i + 1) // Cell.DIRECTIONS, (i + 1) // Cell.DIRECTIONS - 1 + (i + 1) % Cell.DIRECTIONS] = m[i]
+            mtx.append(ma)
+
+        # define all possible node activation
+        node_act_list = _dfs(0, depth - 1)[1:]
+        transfer_mtx = {}
+        for arch_code in node_act_list:
+            # make sure each activated node has an active connection, inactivated node has no connection
+            arch_code_mtx = [_ for _ in mtx if ((np.sum(_, 0) > 0).astype(int) == np.array(arch_code)).all()]
+            transfer_mtx[str(np.array(arch_code))] = arch_code_mtx
+
+        return transfer_mtx, node_act_list, all_connect[1:]
+
+    def weight_parameters(self):
+        return [param for name, param in self.named_parameters() if name not in self._arch_param_names]
+
+    def get_prob_a(self, child: bool = False):
+        """
+        Get final path and child model probabilities from architecture weights `log_alpha_a`.
+        This is used in forward pass, getting training loss, and final decoding.
+
+        Args:
+            child: return child probability (used in decoding)
+        Return:
+            arch_code_prob_a: the path activation probability of size:
+                `[number of blocks, number of paths in each block]`.
+                For 12 blocks, 4 depths search space, the size is [12,10]
+            probs_a: The probability of all child models (size 1023x10). Each child model is a path activation pattern
+                 (1D vector of length 10 for 10 paths). In total 1023 child models (2^10 -1)
+        """
+        _arch_code_prob_a = torch.sigmoid(self.log_alpha_a)
+        # remove the case where all path are zero, and re-normalize.
+        norm = 1 - (1 - _arch_code_prob_a).prod(-1)
+        arch_code_prob_a = _arch_code_prob_a / norm.unsqueeze(1)
+        if child:
+            path_activation = torch.from_numpy(self.child_list).to(self.device)
+            probs_a = [
+                (
+                    path_activation * _arch_code_prob_a[blk_idx]
+                    + (1 - path_activation) * (1 - _arch_code_prob_a[blk_idx])
+                ).prod(-1)
+                / norm[blk_idx]
+                for blk_idx in range(self.num_blocks)
+            ]
+            probs_a = torch.stack(probs_a)  # type: ignore
+            return probs_a, arch_code_prob_a
+        return None, arch_code_prob_a
+
+    def get_ram_cost_usage(self, in_size, full: bool = False):
+        """
+        Get estimated output tensor size to approximate RAM consumption.
+
+        Args:
+            in_size: input image shape (4D/5D, ``[BCHW[D]]``) at the highest resolution level.
+            full: full ram cost usage with all probability of 1.
+        """
+        # convert input image size to feature map size at each level
+        batch_size = in_size[0]
+        image_size = np.array(in_size[-self._spatial_dims :])
+        sizes = []
+        for res_idx in range(self.num_depths):
+            sizes.append(batch_size * self.filter_nums[res_idx] * (image_size // (2**res_idx)).prod())
+        sizes = torch.tensor(sizes).to(torch.float32).to(self.device) / (2 ** (int(self.use_downsample)))
+        probs_a, arch_code_prob_a = self.get_prob_a(child=False)
+        cell_prob = F.softmax(self.log_alpha_c, dim=-1)
+        if full:
+            arch_code_prob_a = arch_code_prob_a.detach()
+            arch_code_prob_a.fill_(1)
+        ram_cost = torch.from_numpy(self.ram_cost).to(torch.float32).to(self.device)
+        usage = 0.0
+        for blk_idx in range(self.num_blocks):
+            # node activation for input
+            # cell operation
+            for path_idx in range(len(self.arch_code2out)):
+                usage += (
+                    arch_code_prob_a[blk_idx, path_idx]
+                    * (1 + (ram_cost[blk_idx, path_idx] * cell_prob[blk_idx, path_idx]).sum())
+                    * sizes[self.arch_code2out[path_idx]]
+                )
+        return usage * 32 / 8 / 1024**2
+
+    def get_topology_entropy(self, probs):
+        """
+        Get topology entropy loss at searching stage.
+
+        Args:
+            probs: path activation probabilities
+        """
+        if hasattr(self, "node2in"):
+            node2in = self.node2in  # pylint: disable=E0203
+            node2out = self.node2out  # pylint: disable=E0203
+        else:
+            # node activation index to feasible input child_idx
+            node2in = [[] for _ in range(len(self.node_act_list))]
+            # node activation index to feasible output child_idx
+            node2out = [[] for _ in range(len(self.node_act_list))]
+            for child_idx in range(len(self.child_list)):
+                _node_in, _node_out = np.zeros(self.num_depths), np.zeros(self.num_depths)
+                for res_idx in range(len(self.arch_code2out)):
+                    _node_out[self.arch_code2out[res_idx]] += self.child_list[child_idx][res_idx]
+                    _node_in[self.arch_code2in[res_idx]] += self.child_list[child_idx][res_idx]
+                _node_in = (_node_in >= 1).astype(int)
+                _node_out = (_node_out >= 1).astype(int)
+                node2in[self.node_act_dict[str(_node_out)]].append(child_idx)
+                node2out[self.node_act_dict[str(_node_in)]].append(child_idx)
+            self.node2in = node2in
+            self.node2out = node2out
+        # calculate entropy
+        ent = 0
+        for blk_idx in range(self.num_blocks - 1):
+            blk_ent = 0
+            # node activation probability
+            for node_idx in range(len(self.node_act_list)):
+                _node_p = probs[blk_idx, node2in[node_idx]].sum()
+                _out_probs = probs[blk_idx + 1, node2out[node_idx]].sum()
+                blk_ent += -(_node_p * torch.log(_out_probs + 1e-5) + (1 - _node_p) * torch.log(1 - _out_probs + 1e-5))
+            ent += blk_ent
+        return ent
+
+    def decode(self):
+        """
+        Decode network log_alpha_a/log_alpha_c using dijkstra shortest path algorithm.
+
+        `[node_a, arch_code_a, arch_code_c, arch_code_a_max]` is decoded when using ``self.decode()``.
+
+        For example, for a ``num_depths=4``, ``num_blocks=12`` search space:
+
+            - ``node_a`` is a 4x13 binary matrix representing if a feature node is activated
+              (13 because of multi-resolution inputs).
+            - ``arch_code_a`` is a 12x10 (10 paths) binary matrix representing if a path is activated.
+            - ``arch_code_c`` is a 12x10x5 (5 operations) binary matrix representing if a cell operation is used.
+
+        Return:
+            arch_code with maximum probability
+        """
+        probs, arch_code_prob_a = self.get_prob_a(child=True)
+        arch_code_a_max = self.child_list[torch.argmax(probs, -1).data.cpu().numpy()]
+        arch_code_c = torch.argmax(F.softmax(self.log_alpha_c, -1), -1).data.cpu().numpy()
+        probs = probs.data.cpu().numpy()
+
+        # define adjacency matrix
+        amtx = np.zeros(
+            (1 + len(self.child_list) * self.num_blocks + 1, 1 + len(self.child_list) * self.num_blocks + 1)
+        )
+
+        # build a path activation to child index searching dictionary
+        path2child = {str(self.child_list[i]): i for i in range(len(self.child_list))}
+
+        # build a submodel to submodel index
+        sub_amtx = np.zeros((len(self.child_list), len(self.child_list)))
+        for child_idx in range(len(self.child_list)):
+            _node_act = np.zeros(self.num_depths).astype(int)
+            for path_idx in range(len(self.child_list[child_idx])):
+                _node_act[self.arch_code2out[path_idx]] += self.child_list[child_idx][path_idx]
+            _node_act = (_node_act >= 1).astype(int)
+            for mtx in self.transfer_mtx[str(_node_act)]:
+                connect_child_idx = path2child[str(mtx.flatten()[self.tidx].astype(int))]
+                sub_amtx[child_idx, connect_child_idx] = 1
+
+        # fill in source to first block, add 1e-5/1e-3 to avoid log0 and negative edge weights
+        amtx[0, 1 : 1 + len(self.child_list)] = -np.log(probs[0] + 1e-5) + 0.001
+
+        # fill in the rest blocks
+        for blk_idx in range(1, self.num_blocks):
+            amtx[
+                1 + (blk_idx - 1) * len(self.child_list) : 1 + blk_idx * len(self.child_list),
+                1 + blk_idx * len(self.child_list) : 1 + (blk_idx + 1) * len(self.child_list),
+            ] = sub_amtx * np.tile(-np.log(probs[blk_idx] + 1e-5) + 0.001, (len(self.child_list), 1))
+
+        # fill in the last to the sink
+        amtx[1 + (self.num_blocks - 1) * len(self.child_list) : 1 + self.num_blocks * len(self.child_list), -1] = 0.001
+
+        graph = csr_matrix(amtx)
+        dist_matrix, predecessors, sources = dijkstra(
+            csgraph=graph, directed=True, indices=0, min_only=True, return_predecessors=True
+        )
+        index, a_idx = -1, -1
+        arch_code_a = np.zeros((self.num_blocks, len(self.arch_code2out)))
+        node_a = np.zeros((self.num_blocks + 1, self.num_depths))
+
+        # decoding to paths
+        while True:
+            index = predecessors[index]
+            if index == 0:
+                break
+            child_idx = (index - 1) % len(self.child_list)
+            arch_code_a[a_idx, :] = self.child_list[child_idx]
+            for res_idx in range(len(self.arch_code2out)):
+                node_a[a_idx, self.arch_code2out[res_idx]] += arch_code_a[a_idx, res_idx]
+            a_idx -= 1
+        for res_idx in range(len(self.arch_code2out)):
+            node_a[a_idx, self.arch_code2in[res_idx]] += arch_code_a[0, res_idx]
+        node_a = (node_a >= 1).astype(int)
+        return node_a, arch_code_a, arch_code_c, arch_code_a_max
+
+    def forward(self, x):
+        """
+        Prediction based on dynamic arch_code.
+
+        Args:
+            x: a list of `num_depths` input tensors as a multi-resolution input.
+                tensor is of shape `BCHW[D]` where `C` must match `self.filter_nums`.
+        """
+        # generate path activation probability
+        probs_a, arch_code_prob_a = self.get_prob_a(child=False)
+        inputs = x
+        for blk_idx in range(self.num_blocks):
+            outputs = [0.0] * self.num_depths
+            for res_idx, activation in enumerate(self.arch_code_a[blk_idx].data.cpu().numpy()):
+                if activation:
+                    _w = F.softmax(self.log_alpha_c[blk_idx, res_idx], dim=-1)
+                    outputs[self.arch_code2out[res_idx]] += (
+                        self.cell_tree[str((blk_idx, res_idx))](inputs[self.arch_code2in[res_idx]], weight=_w)
+                        * arch_code_prob_a[blk_idx, res_idx]
+                    )
+            inputs = outputs
+
+        return inputs
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dynunet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dynunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..053ab255b846ecbedb437a96932a24a83e4654c3
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/dynunet.py
@@ -0,0 +1,373 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import List, Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.nn.functional import interpolate
+
+from monai.networks.blocks.dynunet_block import UnetBasicBlock, UnetOutBlock, UnetResBlock, UnetUpBlock
+
+__all__ = ["DynUNet", "DynUnet", "Dynunet"]
+
+
+class DynUNetSkipLayer(nn.Module):
+    """
+    Defines a layer in the UNet topology which combines the downsample and upsample pathways with the skip connection.
+    The member `next_layer` may refer to instances of this class or the final bottleneck layer at the bottom the UNet
+    structure. The purpose of using a recursive class like this is to get around the Torchscript restrictions on
+    looping over lists of layers and accumulating lists of output tensors which must be indexed. The `heads` list is
+    shared amongst all the instances of this class and is used to store the output from the supervision heads during
+    forward passes of the network.
+    """
+
+    heads: Optional[List[torch.Tensor]]
+
+    def __init__(self, index, downsample, upsample, next_layer, heads=None, super_head=None):
+        super().__init__()
+        self.downsample = downsample
+        self.next_layer = next_layer
+        self.upsample = upsample
+        self.super_head = super_head
+        self.heads = heads
+        self.index = index
+
+    def forward(self, x):
+        downout = self.downsample(x)
+        nextout = self.next_layer(downout)
+        upout = self.upsample(nextout, downout)
+        if self.super_head is not None and self.heads is not None and self.index > 0:
+            self.heads[self.index - 1] = self.super_head(upout)
+
+        return upout
+
+
+class DynUNet(nn.Module):
+    """
+    This reimplementation of a dynamic UNet (DynUNet) is based on:
+    `Automated Design of Deep Learning Methods for Biomedical Image Segmentation <https://arxiv.org/abs/1904.08128>`_.
+    `nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation <https://arxiv.org/abs/1809.10486>`_.
+    `Optimized U-Net for Brain Tumor Segmentation <https://arxiv.org/pdf/2110.03352.pdf>`_.
+
+    This model is more flexible compared with ``monai.networks.nets.UNet`` in three
+    places:
+
+        - Residual connection is supported in conv blocks.
+        - Anisotropic kernel sizes and strides can be used in each layers.
+        - Deep supervision heads can be added.
+
+    The model supports 2D or 3D inputs and is consisted with four kinds of blocks:
+    one input block, `n` downsample blocks, one bottleneck and `n+1` upsample blocks. Where, `n>0`.
+    The first and last kernel and stride values of the input sequences are used for input block and
+    bottleneck respectively, and the rest value(s) are used for downsample and upsample blocks.
+    Therefore, pleasure ensure that the length of input sequences (``kernel_size`` and ``strides``)
+    is no less than 3 in order to have at least one downsample and upsample blocks.
+
+    To meet the requirements of the structure, the input size for each spatial dimension should be divisible
+    by the product of all strides in the corresponding dimension. In addition, the minimal spatial size should have
+    at least one dimension that has twice the size of the product of all strides.
+    For example, if `strides=((1, 2, 4), 2, 2, 1)`, the spatial size should be divisible by `(4, 8, 16)`,
+    and the minimal spatial size is `(8, 8, 16)` or `(4, 16, 16)` or `(4, 8, 32)`.
+
+    The output size for each spatial dimension equals to the input size of the corresponding dimension divided by the
+    stride in strides[0].
+    For example, if `strides=((1, 2, 4), 2, 2, 1)` and the input size is `(64, 32, 32)`, the output size is `(64, 16, 8)`.
+
+    For backwards compatibility with old weights, please set `strict=False` when calling `load_state_dict`.
+
+    Usage example with medical segmentation decathlon dataset is available at:
+    https://github.com/Project-MONAI/tutorials/tree/master/modules/dynunet_pipeline.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        kernel_size: convolution kernel size.
+        strides: convolution strides for each blocks.
+        upsample_kernel_size: convolution kernel size for transposed convolution layers. The values should
+            equal to strides[1:].
+        filters: number of output channels for each blocks. Different from nnU-Net, in this implementation we add
+            this argument to make the network more flexible. As shown in the third reference, one way to determine
+            this argument is like:
+            ``[64, 96, 128, 192, 256, 384, 512, 768, 1024][: len(strides)]``.
+            The above way is used in the network that wins task 1 in the BraTS21 Challenge.
+            If not specified, the way which nnUNet used will be employed. Defaults to ``None``.
+        dropout: dropout ratio. Defaults to no dropout.
+        norm_name: feature normalization type and arguments. Defaults to ``INSTANCE``.
+            `INSTANCE_NVFUSER` is a faster version of the instance norm layer, it can be used when:
+            1) `spatial_dims=3`, 2) CUDA device is available, 3) `apex` is installed and 4) non-Windows OS is used.
+        act_name: activation layer type and arguments. Defaults to ``leakyrelu``.
+        deep_supervision: whether to add deep supervision head before output. Defaults to ``False``.
+            If ``True``, in training mode, the forward function will output not only the final feature map
+            (from `output_block`), but also the feature maps that come from the intermediate up sample layers.
+            In order to unify the return type (the restriction of TorchScript), all intermediate
+            feature maps are interpolated into the same size as the final feature map and stacked together
+            (with a new dimension in the first axis)into one single tensor.
+            For instance, if there are two intermediate feature maps with shapes: (1, 2, 16, 12) and
+            (1, 2, 8, 6), and the final feature map has the shape (1, 2, 32, 24), then all intermediate feature maps
+            will be interpolated into (1, 2, 32, 24), and the stacked tensor will has the shape (1, 3, 2, 32, 24).
+            When calculating the loss, you can use torch.unbind to get all feature maps can compute the loss
+            one by one with the ground truth, then do a weighted average for all losses to achieve the final loss.
+        deep_supr_num: number of feature maps that will output during deep supervision head. The
+            value should be larger than 0 and less than the number of up sample layers.
+            Defaults to 1.
+        res_block: whether to use residual connection based convolution blocks during the network.
+            Defaults to ``False``.
+        trans_bias: whether to set the bias parameter in transposed convolution layers. Defaults to ``False``.
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Sequence[Union[Sequence[int], int]],
+        strides: Sequence[Union[Sequence[int], int]],
+        upsample_kernel_size: Sequence[Union[Sequence[int], int]],
+        filters: Optional[Sequence[int]] = None,
+        dropout: Optional[Union[Tuple, str, float]] = None,
+        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
+        act_name: Union[Tuple, str] = ("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
+        deep_supervision: bool = False,
+        deep_supr_num: int = 1,
+        res_block: bool = False,
+        trans_bias: bool = False,
+    ):
+        super().__init__()
+        self.spatial_dims = spatial_dims
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.strides = strides
+        self.upsample_kernel_size = upsample_kernel_size
+        self.norm_name = norm_name
+        self.act_name = act_name
+        self.dropout = dropout
+        self.conv_block = UnetResBlock if res_block else UnetBasicBlock
+        self.trans_bias = trans_bias
+        if filters is not None:
+            self.filters = filters
+            self.check_filters()
+        else:
+            self.filters = [min(2 ** (5 + i), 320 if spatial_dims == 3 else 512) for i in range(len(strides))]
+        self.input_block = self.get_input_block()
+        self.downsamples = self.get_downsamples()
+        self.bottleneck = self.get_bottleneck()
+        self.upsamples = self.get_upsamples()
+        self.output_block = self.get_output_block(0)
+        self.deep_supervision = deep_supervision
+        self.deep_supr_num = deep_supr_num
+        # initialize the typed list of supervision head outputs so that Torchscript can recognize what's going on
+        self.heads: List[torch.Tensor] = [torch.rand(1)] * self.deep_supr_num
+        if self.deep_supervision:
+            self.deep_supervision_heads = self.get_deep_supervision_heads()
+            self.check_deep_supr_num()
+
+        self.apply(self.initialize_weights)
+        self.check_kernel_stride()
+
+        def create_skips(index, downsamples, upsamples, bottleneck, superheads=None):
+            """
+            Construct the UNet topology as a sequence of skip layers terminating with the bottleneck layer. This is
+            done recursively from the top down since a recursive nn.Module subclass is being used to be compatible
+            with Torchscript. Initially the length of `downsamples` will be one more than that of `superheads`
+            since the `input_block` is passed to this function as the first item in `downsamples`, however this
+            shouldn't be associated with a supervision head.
+            """
+
+            if len(downsamples) != len(upsamples):
+                raise ValueError(f"{len(downsamples)} != {len(upsamples)}")
+
+            if len(downsamples) == 0:  # bottom of the network, pass the bottleneck block
+                return bottleneck
+
+            if superheads is None:
+                next_layer = create_skips(1 + index, downsamples[1:], upsamples[1:], bottleneck)
+                return DynUNetSkipLayer(index, downsample=downsamples[0], upsample=upsamples[0], next_layer=next_layer)
+
+            super_head_flag = False
+            if index == 0:  # don't associate a supervision head with self.input_block
+                rest_heads = superheads
+            else:
+                if len(superheads) > 0:
+                    super_head_flag = True
+                    rest_heads = superheads[1:]
+                else:
+                    rest_heads = nn.ModuleList()
+
+            # create the next layer down, this will stop at the bottleneck layer
+            next_layer = create_skips(1 + index, downsamples[1:], upsamples[1:], bottleneck, superheads=rest_heads)
+            if super_head_flag:
+                return DynUNetSkipLayer(
+                    index,
+                    downsample=downsamples[0],
+                    upsample=upsamples[0],
+                    next_layer=next_layer,
+                    heads=self.heads,
+                    super_head=superheads[0],
+                )
+
+            return DynUNetSkipLayer(index, downsample=downsamples[0], upsample=upsamples[0], next_layer=next_layer)
+
+        if not self.deep_supervision:
+            self.skip_layers = create_skips(
+                0, [self.input_block] + list(self.downsamples), self.upsamples[::-1], self.bottleneck
+            )
+        else:
+            self.skip_layers = create_skips(
+                0,
+                [self.input_block] + list(self.downsamples),
+                self.upsamples[::-1],
+                self.bottleneck,
+                superheads=self.deep_supervision_heads,
+            )
+
+    def check_kernel_stride(self):
+        kernels, strides = self.kernel_size, self.strides
+        error_msg = "length of kernel_size and strides should be the same, and no less than 3."
+        if len(kernels) != len(strides) or len(kernels) < 3:
+            raise ValueError(error_msg)
+
+        for idx, k_i in enumerate(kernels):
+            kernel, stride = k_i, strides[idx]
+            if not isinstance(kernel, int):
+                error_msg = f"length of kernel_size in block {idx} should be the same as spatial_dims."
+                if len(kernel) != self.spatial_dims:
+                    raise ValueError(error_msg)
+            if not isinstance(stride, int):
+                error_msg = f"length of stride in block {idx} should be the same as spatial_dims."
+                if len(stride) != self.spatial_dims:
+                    raise ValueError(error_msg)
+
+    def check_deep_supr_num(self):
+        deep_supr_num, strides = self.deep_supr_num, self.strides
+        num_up_layers = len(strides) - 1
+        if deep_supr_num >= num_up_layers:
+            raise ValueError("deep_supr_num should be less than the number of up sample layers.")
+        if deep_supr_num < 1:
+            raise ValueError("deep_supr_num should be larger than 0.")
+
+    def check_filters(self):
+        filters = self.filters
+        if len(filters) < len(self.strides):
+            raise ValueError("length of filters should be no less than the length of strides.")
+        else:
+            self.filters = filters[: len(self.strides)]
+
+    def forward(self, x):
+        out = self.skip_layers(x)
+        out = self.output_block(out)
+        if self.training and self.deep_supervision:
+            out_all = [out]
+            for feature_map in self.heads:
+                out_all.append(interpolate(feature_map, out.shape[2:]))
+            return torch.stack(out_all, dim=1)
+        return out
+
+    def get_input_block(self):
+        return self.conv_block(
+            self.spatial_dims,
+            self.in_channels,
+            self.filters[0],
+            self.kernel_size[0],
+            self.strides[0],
+            self.norm_name,
+            self.act_name,
+            dropout=self.dropout,
+        )
+
+    def get_bottleneck(self):
+        return self.conv_block(
+            self.spatial_dims,
+            self.filters[-2],
+            self.filters[-1],
+            self.kernel_size[-1],
+            self.strides[-1],
+            self.norm_name,
+            self.act_name,
+            dropout=self.dropout,
+        )
+
+    def get_output_block(self, idx: int):
+        return UnetOutBlock(self.spatial_dims, self.filters[idx], self.out_channels, dropout=self.dropout)
+
+    def get_downsamples(self):
+        inp, out = self.filters[:-2], self.filters[1:-1]
+        strides, kernel_size = self.strides[1:-1], self.kernel_size[1:-1]
+        return self.get_module_list(inp, out, kernel_size, strides, self.conv_block)
+
+    def get_upsamples(self):
+        inp, out = self.filters[1:][::-1], self.filters[:-1][::-1]
+        strides, kernel_size = self.strides[1:][::-1], self.kernel_size[1:][::-1]
+        upsample_kernel_size = self.upsample_kernel_size[::-1]
+        return self.get_module_list(
+            inp, out, kernel_size, strides, UnetUpBlock, upsample_kernel_size, trans_bias=self.trans_bias
+        )
+
+    def get_module_list(
+        self,
+        in_channels: List[int],
+        out_channels: List[int],
+        kernel_size: Sequence[Union[Sequence[int], int]],
+        strides: Sequence[Union[Sequence[int], int]],
+        conv_block: nn.Module,
+        upsample_kernel_size: Optional[Sequence[Union[Sequence[int], int]]] = None,
+        trans_bias: bool = False,
+    ):
+        layers = []
+        if upsample_kernel_size is not None:
+            for in_c, out_c, kernel, stride, up_kernel in zip(
+                in_channels, out_channels, kernel_size, strides, upsample_kernel_size
+            ):
+                params = {
+                    "spatial_dims": self.spatial_dims,
+                    "in_channels": in_c,
+                    "out_channels": out_c,
+                    "kernel_size": kernel,
+                    "stride": stride,
+                    "norm_name": self.norm_name,
+                    "act_name": self.act_name,
+                    "dropout": self.dropout,
+                    "upsample_kernel_size": up_kernel,
+                    "trans_bias": trans_bias,
+                }
+                layer = conv_block(**params)
+                layers.append(layer)
+        else:
+            for in_c, out_c, kernel, stride in zip(in_channels, out_channels, kernel_size, strides):
+                params = {
+                    "spatial_dims": self.spatial_dims,
+                    "in_channels": in_c,
+                    "out_channels": out_c,
+                    "kernel_size": kernel,
+                    "stride": stride,
+                    "norm_name": self.norm_name,
+                    "act_name": self.act_name,
+                    "dropout": self.dropout,
+                }
+                layer = conv_block(**params)
+                layers.append(layer)
+        return nn.ModuleList(layers)
+
+    def get_deep_supervision_heads(self):
+        return nn.ModuleList([self.get_output_block(i + 1) for i in range(self.deep_supr_num)])
+
+    @staticmethod
+    def initialize_weights(module):
+        if isinstance(module, (nn.Conv3d, nn.Conv2d, nn.ConvTranspose3d, nn.ConvTranspose2d)):
+            module.weight = nn.init.kaiming_normal_(module.weight, a=0.01)
+            if module.bias is not None:
+                module.bias = nn.init.constant_(module.bias, 0)
+
+
+DynUnet = Dynunet = DynUNet
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/fullyconnectednet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/fullyconnectednet.py
new file mode 100644
index 0000000000000000000000000000000000000000..810c07431bc9f7ce718d31bc9ce5cfc2b1aedf8f
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/fullyconnectednet.py
@@ -0,0 +1,185 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks import ADN
+from monai.networks.layers.factories import Act
+
+__all__ = ["FullyConnectedNet", "VarFullyConnectedNet"]
+
+
+def _get_adn_layer(
+    act: Optional[Union[Tuple, str]], dropout: Optional[Union[Tuple, str, float]], ordering: Optional[str]
+) -> ADN:
+    if ordering:
+        return ADN(act=act, dropout=dropout, dropout_dim=1, ordering=ordering)
+    return ADN(act=act, dropout=dropout, dropout_dim=1)
+
+
+class FullyConnectedNet(nn.Sequential):
+    """
+    Simple full-connected layer neural network composed of a sequence of linear layers with PReLU activation and
+    dropout.  The network accepts input with `in_channels` channels, has output with `out_channels` channels, and
+    hidden layer output channels given in `hidden_channels`. If `bias` is True then linear units have a bias term.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        hidden_channels: number of output channels for each hidden layer.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts 4 values and infers 3 values as output, has 3 hidden layers with 10, 20, 10 values as output
+        net = FullyConnectedNet(4, 3, [10, 20, 10], dropout=0.2)
+
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        hidden_channels: Sequence[int],
+        dropout: Optional[Union[Tuple, str, float]] = None,
+        act: Optional[Union[Tuple, str]] = Act.PRELU,
+        bias: bool = True,
+        adn_ordering: Optional[str] = None,
+    ) -> None:
+        """
+        Defines a network accept input with `in_channels` channels, output of `out_channels` channels, and hidden layers
+        with channels given in `hidden_channels`. If `bias` is True then linear units have a bias term.
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = list(hidden_channels)
+        self.act = act
+        self.dropout = dropout
+        self.adn_ordering = adn_ordering
+
+        self.add_module("flatten", nn.Flatten())
+
+        prev_channels = self.in_channels
+        for i, c in enumerate(hidden_channels):
+            self.add_module("hidden_%i" % i, self._get_layer(prev_channels, c, bias))
+            prev_channels = c
+
+        self.add_module("output", nn.Linear(prev_channels, out_channels, bias))
+
+    def _get_layer(self, in_channels: int, out_channels: int, bias: bool) -> nn.Sequential:
+        seq = nn.Sequential(
+            nn.Linear(in_channels, out_channels, bias), _get_adn_layer(self.act, self.dropout, self.adn_ordering)
+        )
+        return seq
+
+
+class VarFullyConnectedNet(nn.Module):
+    """
+    Variational fully-connected network. This is composed of an encode layer, reparameterization layer, and then a
+    decode layer.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        latent_size: number of latent variables to use.
+        encode_channels: number of output channels for each hidden layer of the encode half.
+        decode_channels: number of output channels for each hidden layer of the decode half.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts inputs with 4 values, uses a latent space of 2 variables, and produces outputs of 3 values
+        net = VarFullyConnectedNet(4, 3, 2, [5, 10], [10, 5])
+
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        latent_size: int,
+        encode_channels: Sequence[int],
+        decode_channels: Sequence[int],
+        dropout: Optional[Union[Tuple, str, float]] = None,
+        act: Optional[Union[Tuple, str]] = Act.PRELU,
+        bias: bool = True,
+        adn_ordering: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.latent_size = latent_size
+
+        self.encode = nn.Sequential()
+        self.decode = nn.Sequential()
+        self.flatten = nn.Flatten()
+
+        self.adn_layer = _get_adn_layer(act, dropout, adn_ordering)
+
+        prev_channels = self.in_channels
+        for i, c in enumerate(encode_channels):
+            self.encode.add_module("encode_%i" % i, self._get_layer(prev_channels, c, bias))
+            prev_channels = c
+
+        self.mu = nn.Linear(prev_channels, self.latent_size)
+        self.logvar = nn.Linear(prev_channels, self.latent_size)
+        self.decodeL = nn.Linear(self.latent_size, prev_channels)
+
+        for i, c in enumerate(decode_channels):
+            self.decode.add_module("decode%i" % i, self._get_layer(prev_channels, c, bias))
+            prev_channels = c
+
+        self.decode.add_module("final", nn.Linear(prev_channels, out_channels, bias))
+
+    def _get_layer(self, in_channels: int, out_channels: int, bias: bool) -> nn.Sequential:
+        seq = nn.Sequential(nn.Linear(in_channels, out_channels, bias))
+        seq.add_module("ADN", self.adn_layer)
+        return seq
+
+    def encode_forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        x = self.encode(x)
+        x = self.flatten(x)
+        mu = self.mu(x)
+        logvar = self.logvar(x)
+        return mu, logvar
+
+    def decode_forward(self, z: torch.Tensor, use_sigmoid: bool = True) -> torch.Tensor:
+        x: torch.Tensor
+        x = self.decodeL(z)
+        x = torch.relu(x)
+        x = self.flatten(x)
+        x = self.decode(x)
+        if use_sigmoid:
+            x = torch.sigmoid(x)
+        return x
+
+    def reparameterize(self, mu: torch.Tensor, logvar: torch.Tensor) -> torch.Tensor:
+        std = torch.exp(0.5 * logvar)
+
+        if self.training:  # multiply random noise with std only during training
+            std = torch.randn_like(std).mul(std)
+
+        return std.add_(mu)
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        mu, logvar = self.encode_forward(x)
+        z = self.reparameterize(mu, logvar)
+        return self.decode_forward(z), mu, logvar, z
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/generator.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..a69cae4d7b796f021f25b6cc5f8886c0ffd927be
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/generator.py
@@ -0,0 +1,149 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks import Convolution, ResidualUnit
+from monai.networks.layers.factories import Act, Norm
+from monai.networks.layers.simplelayers import Reshape
+from monai.utils import ensure_tuple, ensure_tuple_rep
+
+
+class Generator(nn.Module):
+    """
+    Defines a simple generator network accepting a latent vector and through a sequence of convolution layers
+    constructs an output tensor of greater size and high dimensionality. The method `_get_layer` is used to
+    create each of these layers, override this method to define layers beyond the default
+    :py:class:`monai.networks.blocks.Convolution` or :py:class:`monai.networks.blocks.ResidualUnit` layers.
+
+    The layers are constructed using the values in the `channels` and `strides` arguments, the number being defined by
+    the length of these (which must match). Input is first passed through a :py:class:`torch.nn.Linear` layer to
+    convert the input vector to an image tensor with dimensions `start_shape`. This passes through the convolution
+    layers and is progressively upsampled if the `strides` values are greater than 1 using transpose convolutions. The
+    size of the final output is defined by the `start_shape` dimension and the amount of upsampling done through
+    strides. In the default definition the size of the output's spatial dimensions will be that of `start_shape`
+    multiplied by the product of `strides`, thus the example network below upsamples an starting size of (64, 8, 8)
+    to (1, 64, 64) since its `strides` are (2, 2, 2).
+
+    Args:
+        latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension)
+        start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (upscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # 3 layers, latent input vector of shape (42, 24), output volume of shape (1, 64, 64)
+        net = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2))
+
+    """
+
+    def __init__(
+        self,
+        latent_shape: Sequence[int],
+        start_shape: Sequence[int],
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 2,
+        act=Act.PRELU,
+        norm=Norm.INSTANCE,
+        dropout: Optional[float] = None,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+
+        self.in_channels, *self.start_shape = ensure_tuple(start_shape)
+        self.dimensions = len(self.start_shape)
+
+        self.latent_shape = ensure_tuple(latent_shape)
+        self.channels = ensure_tuple(channels)
+        self.strides = ensure_tuple(strides)
+        self.kernel_size = ensure_tuple_rep(kernel_size, self.dimensions)
+        self.num_res_units = num_res_units
+        self.act = act
+        self.norm = norm
+        self.dropout = dropout
+        self.bias = bias
+
+        self.flatten = nn.Flatten()
+        self.linear = nn.Linear(int(np.prod(self.latent_shape)), int(np.prod(start_shape)))
+        self.reshape = Reshape(*start_shape)
+        self.conv = nn.Sequential()
+
+        echannel = self.in_channels
+
+        # transform tensor of shape `start_shape' into output shape through transposed convolutions and residual units
+        for i, (c, s) in enumerate(zip(channels, strides)):
+            is_last = i == len(channels) - 1
+            layer = self._get_layer(echannel, c, s, is_last)
+            self.conv.add_module("layer_%i" % i, layer)
+            echannel = c
+
+    def _get_layer(
+        self, in_channels: int, out_channels: int, strides: int, is_last: bool
+    ) -> Union[Convolution, nn.Sequential]:
+        """
+        Returns a layer accepting inputs with `in_channels` number of channels and producing outputs of `out_channels`
+        number of channels. The `strides` indicates upsampling factor, ie. transpose convolutional stride. If `is_last`
+        is True this is the final layer and is not expected to include activation and normalization layers.
+        """
+
+        layer: Union[Convolution, nn.Sequential]
+
+        layer = Convolution(
+            in_channels=in_channels,
+            strides=strides,
+            is_transposed=True,
+            conv_only=is_last or self.num_res_units > 0,
+            spatial_dims=self.dimensions,
+            out_channels=out_channels,
+            kernel_size=self.kernel_size,
+            act=self.act,
+            norm=self.norm,
+            dropout=self.dropout,
+            bias=self.bias,
+        )
+
+        if self.num_res_units > 0:
+            ru = ResidualUnit(
+                in_channels=out_channels,
+                subunits=self.num_res_units,
+                last_conv_only=is_last,
+                spatial_dims=self.dimensions,
+                out_channels=out_channels,
+                kernel_size=self.kernel_size,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+            )
+
+            layer = nn.Sequential(layer, ru)
+
+        return layer
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.flatten(x)
+        x = self.linear(x)
+        x = self.reshape(x)
+        x = self.conv(x)
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/highresnet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/highresnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..891a65e67b10efdaa04384449dba9976a12c6d00
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/highresnet.py
@@ -0,0 +1,224 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks import ADN, Convolution
+from monai.networks.layers.simplelayers import ChannelPad
+from monai.utils import ChannelMatching
+
+__all__ = ["HighResBlock", "HighResNet"]
+
+DEFAULT_LAYER_PARAMS_3D = (
+    # initial conv layer
+    {"name": "conv_0", "n_features": 16, "kernel_size": 3},
+    # residual blocks
+    {"name": "res_1", "n_features": 16, "kernels": (3, 3), "repeat": 3},
+    {"name": "res_2", "n_features": 32, "kernels": (3, 3), "repeat": 3},
+    {"name": "res_3", "n_features": 64, "kernels": (3, 3), "repeat": 3},
+    # final conv layers
+    {"name": "conv_1", "n_features": 80, "kernel_size": 1},
+    {"name": "conv_2", "kernel_size": 1},
+)
+
+
+class HighResBlock(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        kernels: Sequence[int] = (3, 3),
+        dilation: Union[Sequence[int], int] = 1,
+        norm_type: Union[Tuple, str] = ("batch", {"affine": True}),
+        acti_type: Union[Tuple, str] = ("relu", {"inplace": True}),
+        bias: bool = False,
+        channel_matching: Union[ChannelMatching, str] = ChannelMatching.PAD,
+    ) -> None:
+        """
+        Args:
+            spatial_dims: number of spatial dimensions of the input image.
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            kernels: each integer k in `kernels` corresponds to a convolution layer with kernel size k.
+            dilation: spacing between kernel elements.
+            norm_type: feature normalization type and arguments.
+                Defaults to ``("batch", {"affine": True})``.
+            acti_type: {``"relu"``, ``"prelu"``, ``"relu6"``}
+                Non-linear activation using ReLU or PReLU. Defaults to ``"relu"``.
+            bias: whether to have a bias term in convolution blocks. Defaults to False.
+                According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+                if a conv layer is directly followed by a batch norm layer, bias should be False.
+            channel_matching: {``"pad"``, ``"project"``}
+                Specifies handling residual branch and conv branch channel mismatches. Defaults to ``"pad"``.
+
+                - ``"pad"``: with zero padding.
+                - ``"project"``: with a trainable conv with kernel size one.
+
+        Raises:
+            ValueError: When ``channel_matching=pad`` and ``in_channels > out_channels``. Incompatible values.
+
+        """
+        super().__init__()
+        self.chn_pad = ChannelPad(
+            spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, mode=channel_matching
+        )
+
+        layers = nn.ModuleList()
+        _in_chns, _out_chns = in_channels, out_channels
+
+        for kernel_size in kernels:
+            layers.append(
+                ADN(ordering="NA", in_channels=_in_chns, act=acti_type, norm=norm_type, norm_dim=spatial_dims)
+            )
+            layers.append(
+                Convolution(
+                    spatial_dims=spatial_dims,
+                    in_channels=_in_chns,
+                    out_channels=_out_chns,
+                    kernel_size=kernel_size,
+                    dilation=dilation,
+                    bias=bias,
+                    conv_only=True,
+                )
+            )
+            _in_chns = _out_chns
+
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_conv: torch.Tensor = self.layers(x)
+        return x_conv + torch.as_tensor(self.chn_pad(x))
+
+
+class HighResNet(nn.Module):
+    """
+    Reimplementation of highres3dnet based on
+    Li et al., "On the compactness, efficiency, and representation of 3D
+    convolutional networks: Brain parcellation as a pretext task", IPMI '17
+
+    Adapted from:
+    https://github.com/NifTK/NiftyNet/blob/v0.6.0/niftynet/network/highres3dnet.py
+    https://github.com/fepegar/highresnet
+
+    Args:
+        spatial_dims: number of spatial dimensions of the input image.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        norm_type: feature normalization type and arguments.
+            Defaults to ``("batch", {"affine": True})``.
+        acti_type: activation type and arguments.
+            Defaults to ``("relu", {"inplace": True})``.
+        dropout_prob: probability of the feature map to be zeroed
+            (only applies to the penultimate conv layer).
+        bias: whether to have a bias term in convolution blocks. Defaults to False.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+        layer_params: specifying key parameters of each layer/block.
+        channel_matching: {``"pad"``, ``"project"``}
+            Specifies handling residual branch and conv branch channel mismatches. Defaults to ``"pad"``.
+
+            - ``"pad"``: with zero padding.
+            - ``"project"``: with a trainable conv with kernel size one.
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int = 3,
+        in_channels: int = 1,
+        out_channels: int = 1,
+        norm_type: Union[str, tuple] = ("batch", {"affine": True}),
+        acti_type: Union[str, tuple] = ("relu", {"inplace": True}),
+        dropout_prob: Optional[Union[Tuple, str, float]] = 0.0,
+        bias: bool = False,
+        layer_params: Sequence[Dict] = DEFAULT_LAYER_PARAMS_3D,
+        channel_matching: Union[ChannelMatching, str] = ChannelMatching.PAD,
+    ) -> None:
+
+        super().__init__()
+        blocks = nn.ModuleList()
+
+        # initial conv layer
+        params = layer_params[0]
+        _in_chns, _out_chns = in_channels, params["n_features"]
+        blocks.append(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=_in_chns,
+                out_channels=_out_chns,
+                kernel_size=params["kernel_size"],
+                adn_ordering="NA",
+                act=acti_type,
+                norm=norm_type,
+                bias=bias,
+            )
+        )
+
+        # residual blocks
+        for (idx, params) in enumerate(layer_params[1:-2]):  # res blocks except the 1st and last two conv layers.
+            _in_chns, _out_chns = _out_chns, params["n_features"]
+            _dilation = 2**idx
+            for _ in range(params["repeat"]):
+                blocks.append(
+                    HighResBlock(
+                        spatial_dims=spatial_dims,
+                        in_channels=_in_chns,
+                        out_channels=_out_chns,
+                        kernels=params["kernels"],
+                        dilation=_dilation,
+                        norm_type=norm_type,
+                        acti_type=acti_type,
+                        bias=bias,
+                        channel_matching=channel_matching,
+                    )
+                )
+                _in_chns = _out_chns
+
+        # final conv layers
+        params = layer_params[-2]
+        _in_chns, _out_chns = _out_chns, params["n_features"]
+        blocks.append(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=_in_chns,
+                out_channels=_out_chns,
+                kernel_size=params["kernel_size"],
+                adn_ordering="NAD",
+                act=acti_type,
+                norm=norm_type,
+                bias=bias,
+                dropout=dropout_prob,
+            )
+        )
+
+        params = layer_params[-1]
+        _in_chns = _out_chns
+        blocks.append(
+            Convolution(
+                spatial_dims=spatial_dims,
+                in_channels=_in_chns,
+                out_channels=out_channels,
+                kernel_size=params["kernel_size"],
+                adn_ordering="NAD",
+                act=acti_type,
+                norm=norm_type,
+                bias=bias,
+                dropout=dropout_prob,
+            )
+        )
+
+        self.blocks = nn.Sequential(*blocks)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.as_tensor(self.blocks(x))
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/milmodel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/milmodel.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f4afaffbe4c74c84491e05959af9d9c0917a4af
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/milmodel.py
@@ -0,0 +1,244 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Optional, Union, cast
+
+import torch
+import torch.nn as nn
+
+from monai.utils.module import optional_import
+
+models, _ = optional_import("torchvision.models")
+
+
+class MILModel(nn.Module):
+    """
+    Multiple Instance Learning (MIL) model, with a backbone classification model.
+    Currently, it only works for 2D images, a typical use case is for classification of the
+    digital pathology whole slide images. The expected shape of input data is `[B, N, C, H, W]`,
+    where `B` is the batch_size of PyTorch Dataloader and `N` is the number of instances
+    extracted from every original image in the batch. A tutorial example is available at:
+    https://github.com/Project-MONAI/tutorials/tree/master/pathology/multiple_instance_learning.
+
+    Args:
+        num_classes: number of output classes.
+        mil_mode: MIL algorithm, available values (Defaults to ``"att"``):
+
+            - ``"mean"`` - average features from all instances, equivalent to pure CNN (non MIL).
+            - ``"max"`` - retain only the instance with the max probability for loss calculation.
+            - ``"att"`` - attention based MIL https://arxiv.org/abs/1802.04712.
+            - ``"att_trans"`` - transformer MIL https://arxiv.org/abs/2111.01556.
+            - ``"att_trans_pyramid"`` - transformer pyramid MIL https://arxiv.org/abs/2111.01556.
+
+        pretrained: init backbone with pretrained weights, defaults to ``True``.
+        backbone: Backbone classifier CNN (either ``None``, a ``nn.Module`` that returns features,
+            or a string name of a torchvision model).
+            Defaults to ``None``, in which case ResNet50 is used.
+        backbone_num_features: Number of output features of the backbone CNN
+            Defaults to ``None`` (necessary only when using a custom backbone)
+        trans_blocks: number of the blocks in `TransformEncoder` layer.
+        trans_dropout: dropout rate in `TransformEncoder` layer.
+
+    """
+
+    def __init__(
+        self,
+        num_classes: int,
+        mil_mode: str = "att",
+        pretrained: bool = True,
+        backbone: Optional[Union[str, nn.Module]] = None,
+        backbone_num_features: Optional[int] = None,
+        trans_blocks: int = 4,
+        trans_dropout: float = 0.0,
+    ) -> None:
+
+        super().__init__()
+
+        if num_classes <= 0:
+            raise ValueError("Number of classes must be positive: " + str(num_classes))
+
+        if mil_mode.lower() not in ["mean", "max", "att", "att_trans", "att_trans_pyramid"]:
+            raise ValueError("Unsupported mil_mode: " + str(mil_mode))
+
+        self.mil_mode = mil_mode.lower()
+        self.attention = nn.Sequential()
+        self.transformer = None  # type: Optional[nn.Module]
+
+        if backbone is None:
+
+            net = models.resnet50(pretrained=pretrained)
+            nfc = net.fc.in_features  # save the number of final features
+            net.fc = torch.nn.Identity()  # remove final linear layer
+
+            self.extra_outputs = {}  # type: Dict[str, torch.Tensor]
+
+            if mil_mode == "att_trans_pyramid":
+                # register hooks to capture outputs of intermediate layers
+                def forward_hook(layer_name):
+                    def hook(module, input, output):
+                        self.extra_outputs[layer_name] = output
+
+                    return hook
+
+                net.layer1.register_forward_hook(forward_hook("layer1"))
+                net.layer2.register_forward_hook(forward_hook("layer2"))
+                net.layer3.register_forward_hook(forward_hook("layer3"))
+                net.layer4.register_forward_hook(forward_hook("layer4"))
+
+        elif isinstance(backbone, str):
+
+            # assume torchvision model string is provided
+            torch_model = getattr(models, backbone, None)
+            if torch_model is None:
+                raise ValueError("Unknown torch vision model" + str(backbone))
+            net = torch_model(pretrained=pretrained)
+
+            if getattr(net, "fc", None) is not None:
+                nfc = net.fc.in_features  # save the number of final features
+                net.fc = torch.nn.Identity()  # remove final linear layer
+            else:
+                raise ValueError(
+                    "Unable to detect FC layer for the torchvision model " + str(backbone),
+                    ". Please initialize the backbone model manually.",
+                )
+
+        elif isinstance(backbone, nn.Module):
+            # use a custom backbone
+            net = backbone
+            nfc = backbone_num_features
+
+            if backbone_num_features is None:
+                raise ValueError("Number of endencoder features must be provided for a custom backbone model")
+
+        else:
+            raise ValueError("Unsupported backbone")
+
+        if backbone is not None and mil_mode not in ["mean", "max", "att", "att_trans"]:
+            raise ValueError("Custom backbone is not supported for the mode:" + str(mil_mode))
+
+        if self.mil_mode in ["mean", "max"]:
+            pass
+        elif self.mil_mode == "att":
+            self.attention = nn.Sequential(nn.Linear(nfc, 2048), nn.Tanh(), nn.Linear(2048, 1))
+
+        elif self.mil_mode == "att_trans":
+            transformer = nn.TransformerEncoderLayer(d_model=nfc, nhead=8, dropout=trans_dropout)
+            self.transformer = nn.TransformerEncoder(transformer, num_layers=trans_blocks)
+            self.attention = nn.Sequential(nn.Linear(nfc, 2048), nn.Tanh(), nn.Linear(2048, 1))
+
+        elif self.mil_mode == "att_trans_pyramid":
+
+            transformer_list = nn.ModuleList(
+                [
+                    nn.TransformerEncoder(
+                        nn.TransformerEncoderLayer(d_model=256, nhead=8, dropout=trans_dropout), num_layers=trans_blocks
+                    ),
+                    nn.Sequential(
+                        nn.Linear(768, 256),
+                        nn.TransformerEncoder(
+                            nn.TransformerEncoderLayer(d_model=256, nhead=8, dropout=trans_dropout),
+                            num_layers=trans_blocks,
+                        ),
+                    ),
+                    nn.Sequential(
+                        nn.Linear(1280, 256),
+                        nn.TransformerEncoder(
+                            nn.TransformerEncoderLayer(d_model=256, nhead=8, dropout=trans_dropout),
+                            num_layers=trans_blocks,
+                        ),
+                    ),
+                    nn.TransformerEncoder(
+                        nn.TransformerEncoderLayer(d_model=2304, nhead=8, dropout=trans_dropout),
+                        num_layers=trans_blocks,
+                    ),
+                ]
+            )
+            self.transformer = transformer_list
+            nfc = nfc + 256
+            self.attention = nn.Sequential(nn.Linear(nfc, 2048), nn.Tanh(), nn.Linear(2048, 1))
+
+        else:
+            raise ValueError("Unsupported mil_mode: " + str(mil_mode))
+
+        self.myfc = nn.Linear(nfc, num_classes)
+        self.net = net
+
+    def calc_head(self, x: torch.Tensor) -> torch.Tensor:
+
+        sh = x.shape
+
+        if self.mil_mode == "mean":
+            x = self.myfc(x)
+            x = torch.mean(x, dim=1)
+
+        elif self.mil_mode == "max":
+            x = self.myfc(x)
+            x, _ = torch.max(x, dim=1)
+
+        elif self.mil_mode == "att":
+
+            a = self.attention(x)
+            a = torch.softmax(a, dim=1)
+            x = torch.sum(x * a, dim=1)
+
+            x = self.myfc(x)
+
+        elif self.mil_mode == "att_trans" and self.transformer is not None:
+
+            x = x.permute(1, 0, 2)
+            x = self.transformer(x)
+            x = x.permute(1, 0, 2)
+
+            a = self.attention(x)
+            a = torch.softmax(a, dim=1)
+            x = torch.sum(x * a, dim=1)
+
+            x = self.myfc(x)
+
+        elif self.mil_mode == "att_trans_pyramid" and self.transformer is not None:
+
+            l1 = torch.mean(self.extra_outputs["layer1"], dim=(2, 3)).reshape(sh[0], sh[1], -1).permute(1, 0, 2)
+            l2 = torch.mean(self.extra_outputs["layer2"], dim=(2, 3)).reshape(sh[0], sh[1], -1).permute(1, 0, 2)
+            l3 = torch.mean(self.extra_outputs["layer3"], dim=(2, 3)).reshape(sh[0], sh[1], -1).permute(1, 0, 2)
+            l4 = torch.mean(self.extra_outputs["layer4"], dim=(2, 3)).reshape(sh[0], sh[1], -1).permute(1, 0, 2)
+
+            transformer_list = cast(nn.ModuleList, self.transformer)
+
+            x = transformer_list[0](l1)
+            x = transformer_list[1](torch.cat((x, l2), dim=2))
+            x = transformer_list[2](torch.cat((x, l3), dim=2))
+            x = transformer_list[3](torch.cat((x, l4), dim=2))
+
+            x = x.permute(1, 0, 2)
+
+            a = self.attention(x)
+            a = torch.softmax(a, dim=1)
+            x = torch.sum(x * a, dim=1)
+
+            x = self.myfc(x)
+
+        else:
+            raise ValueError("Wrong model mode" + str(self.mil_mode))
+
+        return x
+
+    def forward(self, x: torch.Tensor, no_head: bool = False) -> torch.Tensor:
+
+        sh = x.shape
+        x = x.reshape(sh[0] * sh[1], sh[2], sh[3], sh[4])
+
+        x = self.net(x)
+        x = x.reshape(sh[0], sh[1], -1)
+
+        if not no_head:
+            x = self.calc_head(x)
+
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/netadapter.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/netadapter.py
new file mode 100644
index 0000000000000000000000000000000000000000..425c1d5820893273d37244fad5fa1ce97a131983
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/netadapter.py
@@ -0,0 +1,103 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Optional, Tuple, Union
+
+import torch
+
+from monai.networks.layers import Conv, get_pool_layer
+from monai.utils import deprecated_arg
+
+
+class NetAdapter(torch.nn.Module):
+    """
+    Wrapper to replace the last layer of model by convolutional layer or FC layer.
+    This module expects the output of `model layers[0: -2]` is a feature map with shape [B, C, spatial dims],
+    then replace the model's last two layers with an optional `pooling` and a `conv` or `linear` layer.
+
+    Args:
+        model: a PyTorch model, which can be both 2D and 3D models. typically, it can be a pretrained model
+            in Torchvision, like: ``resnet18``, ``resnet34``, ``resnet50``, ``resnet101``, ``resnet152``, etc.
+            more details: https://pytorch.org/vision/stable/models.html.
+        num_classes: number of classes for the last classification layer. Default to 1.
+        dim: number of supported spatial dimensions in the specified model, depends on the model implementation.
+            default to 2 as most Torchvision models are for 2D image processing.
+        in_channels: number of the input channels of last layer. if None, get it from `in_features` of last layer.
+        use_conv: whether use convolutional layer to replace the last layer, default to False.
+        pool: parameters for the pooling layer, it should be a tuple, the first item is name of the pooling layer,
+            the second item is dictionary of the initialization args. if None, will not replace the `layers[-2]`.
+            default to `("avg", {"kernel_size": 7, "stride": 1})`.
+        bias: the bias value when replacing the last layer. if False, the layer will not learn an additive bias,
+            default to True.
+
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
+    """
+
+    @deprecated_arg("n_classes", since="0.6")
+    def __init__(
+        self,
+        model: torch.nn.Module,
+        num_classes: int = 1,
+        dim: int = 2,
+        in_channels: Optional[int] = None,
+        use_conv: bool = False,
+        pool: Optional[Tuple[str, Dict[str, Any]]] = ("avg", {"kernel_size": 7, "stride": 1}),
+        bias: bool = True,
+        n_classes: Optional[int] = None,
+    ):
+        super().__init__()
+        # in case the new num_classes is default but you still call deprecated n_classes
+        if n_classes is not None and num_classes == 1:
+            num_classes = n_classes
+        layers = list(model.children())
+        orig_fc = layers[-1]
+        in_channels_: int
+
+        if in_channels is None:
+            if not hasattr(orig_fc, "in_features"):
+                raise ValueError("please specify the input channels of last layer with arg `in_channels`.")
+            in_channels_ = orig_fc.in_features  # type: ignore
+        else:
+            in_channels_ = in_channels
+
+        if pool is None:
+            # remove the last layer
+            self.features = torch.nn.Sequential(*layers[:-1])
+            self.pool = None
+        else:
+            # remove the last 2 layers
+            self.features = torch.nn.Sequential(*layers[:-2])
+            self.pool = get_pool_layer(name=pool, spatial_dims=dim)
+
+        self.fc: Union[torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv3d]
+        if use_conv:
+            # add 1x1 conv (it behaves like a FC layer)
+            self.fc = Conv[Conv.CONV, dim](in_channels=in_channels_, out_channels=num_classes, kernel_size=1, bias=bias)
+        else:
+            # remove the last Linear layer (fully connected)
+            self.features = torch.nn.Sequential(*layers[:-1])
+            # replace the out_features of FC layer
+            self.fc = torch.nn.Linear(in_features=in_channels_, out_features=num_classes, bias=bias)
+        self.use_conv = use_conv
+
+    def forward(self, x):
+        x = self.features(x)
+        if self.pool is not None:
+            x = self.pool(x)
+
+        if not self.use_conv:
+            x = torch.flatten(x, 1)
+
+        x = self.fc(x)
+
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regressor.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regressor.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a1e6258a9a01cfd78422e6aba910aa5fcd1bed3
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regressor.py
@@ -0,0 +1,151 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks import Convolution, ResidualUnit
+from monai.networks.layers.convutils import calculate_out_shape, same_padding
+from monai.networks.layers.factories import Act, Norm
+from monai.networks.layers.simplelayers import Reshape
+from monai.utils import ensure_tuple, ensure_tuple_rep
+
+__all__ = ["Regressor"]
+
+
+class Regressor(nn.Module):
+    """
+    This defines a network for relating large-sized input tensors to small output tensors, ie. regressing large
+    values to a prediction. An output of a single dimension can be used as value regression or multi-label
+    classification prediction, an output of a single value can be used as a discriminator or critic prediction.
+
+    The network is constructed as a sequence of layers, either :py:class:`monai.networks.blocks.Convolution` or
+    :py:class:`monai.networks.blocks.ResidualUnit`, with a final fully-connected layer resizing the output from the
+    blocks to the final size. Each block is defined with a stride value typically used to downsample the input using
+    strided convolutions. In this way each block progressively condenses information from the input into a deep
+    representation the final fully-connected layer relates to a final result.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        out_shape: tuple of integers stating the dimension of the final output tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # infers a 2-value result (eg. a 2D cartesian coordinate) from a 64x64 image
+        net = Regressor((1, 64, 64), (2,), (2, 4, 8), (2, 2, 2))
+
+    """
+
+    def __init__(
+        self,
+        in_shape: Sequence[int],
+        out_shape: Sequence[int],
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 2,
+        act=Act.PRELU,
+        norm=Norm.INSTANCE,
+        dropout: Optional[float] = None,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+
+        self.in_channels, *self.in_shape = ensure_tuple(in_shape)
+        self.dimensions = len(self.in_shape)
+        self.channels = ensure_tuple(channels)
+        self.strides = ensure_tuple(strides)
+        self.out_shape = ensure_tuple(out_shape)
+        self.kernel_size = ensure_tuple_rep(kernel_size, self.dimensions)
+        self.num_res_units = num_res_units
+        self.act = act
+        self.norm = norm
+        self.dropout = dropout
+        self.bias = bias
+        self.net = nn.Sequential()
+
+        echannel = self.in_channels
+
+        padding = same_padding(kernel_size)
+
+        self.final_size = np.asarray(self.in_shape, dtype=int)
+        self.reshape = Reshape(*self.out_shape)
+
+        # encode stage
+        for i, (c, s) in enumerate(zip(self.channels, self.strides)):
+            layer = self._get_layer(echannel, c, s, i == len(channels) - 1)
+            echannel = c  # use the output channel number as the input for the next loop
+            self.net.add_module("layer_%i" % i, layer)
+            self.final_size = calculate_out_shape(self.final_size, kernel_size, s, padding)  # type: ignore
+
+        self.final = self._get_final_layer((echannel,) + self.final_size)
+
+    def _get_layer(
+        self, in_channels: int, out_channels: int, strides: int, is_last: bool
+    ) -> Union[ResidualUnit, Convolution]:
+        """
+        Returns a layer accepting inputs with `in_channels` number of channels and producing outputs of `out_channels`
+        number of channels. The `strides` indicates downsampling factor, ie. convolutional stride. If `is_last`
+        is True this is the final layer and is not expected to include activation and normalization layers.
+        """
+
+        layer: Union[ResidualUnit, Convolution]
+
+        if self.num_res_units > 0:
+            layer = ResidualUnit(
+                subunits=self.num_res_units,
+                last_conv_only=is_last,
+                spatial_dims=self.dimensions,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                strides=strides,
+                kernel_size=self.kernel_size,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+            )
+        else:
+            layer = Convolution(
+                conv_only=is_last,
+                spatial_dims=self.dimensions,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                strides=strides,
+                kernel_size=self.kernel_size,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+            )
+
+        return layer
+
+    def _get_final_layer(self, in_shape: Sequence[int]):
+        linear = nn.Linear(int(np.product(in_shape)), int(np.product(self.out_shape)))
+        return nn.Sequential(nn.Flatten(), linear)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.net(x)
+        x = self.final(x)
+        x = self.reshape(x)
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regunet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..6776c7ce9e67acd3afbad7db3edd21b2f28e13ae
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/regunet.py
@@ -0,0 +1,418 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Tuple, Union
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from monai.networks.blocks.regunet_block import (
+    RegistrationDownSampleBlock,
+    RegistrationExtractionBlock,
+    RegistrationResidualConvBlock,
+    get_conv_block,
+    get_deconv_block,
+)
+from monai.networks.utils import meshgrid_ij
+
+__all__ = ["RegUNet", "AffineHead", "GlobalNet", "LocalNet"]
+
+
+class RegUNet(nn.Module):
+    """
+    Class that implements an adapted UNet. This class also serve as the parent class of LocalNet and GlobalNet
+
+    Reference:
+        O. Ronneberger, P. Fischer, and T. Brox,
+        “U-net: Convolutional networks for biomedical image segmentation,”,
+        Lecture Notes in Computer Science, 2015, vol. 9351, pp. 234–241.
+        https://arxiv.org/abs/1505.04597
+
+    Adapted from:
+        DeepReg (https://github.com/DeepRegNet/DeepReg)
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        num_channel_initial: int,
+        depth: int,
+        out_kernel_initializer: Optional[str] = "kaiming_uniform",
+        out_activation: Optional[str] = None,
+        out_channels: int = 3,
+        extract_levels: Optional[Tuple[int]] = None,
+        pooling: bool = True,
+        concat_skip: bool = False,
+        encode_kernel_sizes: Union[int, List[int]] = 3,
+    ):
+        """
+        Args:
+            spatial_dims: number of spatial dims
+            in_channels: number of input channels
+            num_channel_initial: number of initial channels
+            depth: input is at level 0, bottom is at level depth.
+            out_kernel_initializer: kernel initializer for the last layer
+            out_activation: activation at the last layer
+            out_channels: number of channels for the output
+            extract_levels: list, which levels from net to extract. The maximum level must equal to ``depth``
+            pooling: for down-sampling, use non-parameterized pooling if true, otherwise use conv3d
+            concat_skip: when up-sampling, concatenate skipped tensor if true, otherwise use addition
+            encode_kernel_sizes: kernel size for down-sampling
+        """
+        super().__init__()
+        if not extract_levels:
+            extract_levels = (depth,)
+        if max(extract_levels) != depth:
+            raise AssertionError
+
+        # save parameters
+        self.spatial_dims = spatial_dims
+        self.in_channels = in_channels
+        self.num_channel_initial = num_channel_initial
+        self.depth = depth
+        self.out_kernel_initializer = out_kernel_initializer
+        self.out_activation = out_activation
+        self.out_channels = out_channels
+        self.extract_levels = extract_levels
+        self.pooling = pooling
+        self.concat_skip = concat_skip
+
+        if isinstance(encode_kernel_sizes, int):
+            encode_kernel_sizes = [encode_kernel_sizes] * (self.depth + 1)
+        if len(encode_kernel_sizes) != self.depth + 1:
+            raise AssertionError
+        self.encode_kernel_sizes: List[int] = encode_kernel_sizes
+
+        self.num_channels = [self.num_channel_initial * (2**d) for d in range(self.depth + 1)]
+        self.min_extract_level = min(self.extract_levels)
+
+        # init layers
+        # all lists start with d = 0
+        self.encode_convs = None
+        self.encode_pools = None
+        self.bottom_block = None
+        self.decode_deconvs = None
+        self.decode_convs = None
+        self.output_block = None
+
+        # build layers
+        self.build_layers()
+
+    def build_layers(self):
+        self.build_encode_layers()
+        self.build_decode_layers()
+
+    def build_encode_layers(self):
+        # encoding / down-sampling
+        self.encode_convs = nn.ModuleList(
+            [
+                self.build_conv_block(
+                    in_channels=self.in_channels if d == 0 else self.num_channels[d - 1],
+                    out_channels=self.num_channels[d],
+                    kernel_size=self.encode_kernel_sizes[d],
+                )
+                for d in range(self.depth)
+            ]
+        )
+        self.encode_pools = nn.ModuleList(
+            [self.build_down_sampling_block(channels=self.num_channels[d]) for d in range(self.depth)]
+        )
+        self.bottom_block = self.build_bottom_block(
+            in_channels=self.num_channels[-2], out_channels=self.num_channels[-1]
+        )
+
+    def build_conv_block(self, in_channels, out_channels, kernel_size):
+        return nn.Sequential(
+            get_conv_block(
+                spatial_dims=self.spatial_dims,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+            ),
+            RegistrationResidualConvBlock(
+                spatial_dims=self.spatial_dims,
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+            ),
+        )
+
+    def build_down_sampling_block(self, channels: int):
+        return RegistrationDownSampleBlock(spatial_dims=self.spatial_dims, channels=channels, pooling=self.pooling)
+
+    def build_bottom_block(self, in_channels: int, out_channels: int):
+        kernel_size = self.encode_kernel_sizes[self.depth]
+        return nn.Sequential(
+            get_conv_block(
+                spatial_dims=self.spatial_dims,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+            ),
+            RegistrationResidualConvBlock(
+                spatial_dims=self.spatial_dims,
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+            ),
+        )
+
+    def build_decode_layers(self):
+        # decoding / up-sampling
+        # [depth - 1, depth - 2, ..., min_extract_level]
+        self.decode_deconvs = nn.ModuleList(
+            [
+                self.build_up_sampling_block(in_channels=self.num_channels[d + 1], out_channels=self.num_channels[d])
+                for d in range(self.depth - 1, self.min_extract_level - 1, -1)
+            ]
+        )
+        self.decode_convs = nn.ModuleList(
+            [
+                self.build_conv_block(
+                    in_channels=(2 * self.num_channels[d] if self.concat_skip else self.num_channels[d]),
+                    out_channels=self.num_channels[d],
+                    kernel_size=3,
+                )
+                for d in range(self.depth - 1, self.min_extract_level - 1, -1)
+            ]
+        )
+
+        # extraction
+        self.output_block = self.build_output_block()
+
+    def build_up_sampling_block(self, in_channels: int, out_channels: int) -> nn.Module:
+        return get_deconv_block(spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels)
+
+    def build_output_block(self) -> nn.Module:
+        return RegistrationExtractionBlock(
+            spatial_dims=self.spatial_dims,
+            extract_levels=self.extract_levels,
+            num_channels=self.num_channels,
+            out_channels=self.out_channels,
+            kernel_initializer=self.out_kernel_initializer,
+            activation=self.out_activation,
+        )
+
+    def forward(self, x):
+        """
+        Args:
+            x: Tensor in shape (batch, ``in_channels``, insize_1, insize_2, [insize_3])
+
+        Returns:
+            Tensor in shape (batch, ``out_channels``, insize_1, insize_2, [insize_3]), with the same spatial size as ``x``
+        """
+        image_size = x.shape[2:]
+        skips = []  # [0, ..., depth - 1]
+        encoded = x
+        for encode_conv, encode_pool in zip(self.encode_convs, self.encode_pools):
+            skip = encode_conv(encoded)
+            encoded = encode_pool(skip)
+            skips.append(skip)
+        decoded = self.bottom_block(encoded)
+
+        outs = [decoded]
+
+        # [depth - 1, ..., min_extract_level]
+        for i, (decode_deconv, decode_conv) in enumerate(zip(self.decode_deconvs, self.decode_convs)):
+            # [depth - 1, depth - 2, ..., min_extract_level]
+            decoded = decode_deconv(decoded)
+            if self.concat_skip:
+                decoded = torch.cat([decoded, skips[-i - 1]], dim=1)
+            else:
+                decoded = decoded + skips[-i - 1]
+            decoded = decode_conv(decoded)
+            outs.append(decoded)
+
+        out = self.output_block(outs, image_size=image_size)
+        return out
+
+
+class AffineHead(nn.Module):
+    def __init__(self, spatial_dims: int, image_size: List[int], decode_size: List[int], in_channels: int):
+        super().__init__()
+        self.spatial_dims = spatial_dims
+        if spatial_dims == 2:
+            in_features = in_channels * decode_size[0] * decode_size[1]
+            out_features = 6
+            out_init = torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)
+        elif spatial_dims == 3:
+            in_features = in_channels * decode_size[0] * decode_size[1] * decode_size[2]
+            out_features = 12
+            out_init = torch.tensor([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], dtype=torch.float)
+        else:
+            raise ValueError(f"only support 2D/3D operation, got spatial_dims={spatial_dims}")
+
+        self.fc = nn.Linear(in_features=in_features, out_features=out_features)
+        self.grid = self.get_reference_grid(image_size)  # (spatial_dims, ...)
+
+        # init weight/bias
+        self.fc.weight.data.zero_()
+        self.fc.bias.data.copy_(out_init)
+
+    @staticmethod
+    def get_reference_grid(image_size: Union[Tuple[int], List[int]]) -> torch.Tensor:
+        mesh_points = [torch.arange(0, dim) for dim in image_size]
+        grid = torch.stack(meshgrid_ij(*mesh_points), dim=0)  # (spatial_dims, ...)
+        return grid.to(dtype=torch.float)
+
+    def affine_transform(self, theta: torch.Tensor):
+        # (spatial_dims, ...) -> (spatial_dims + 1, ...)
+        grid_padded = torch.cat([self.grid, torch.ones_like(self.grid[:1])])
+
+        # grid_warped[b,p,...] = sum_over_q(grid_padded[q,...] * theta[b,p,q]
+        if self.spatial_dims == 2:
+            grid_warped = torch.einsum("qij,bpq->bpij", grid_padded, theta.reshape(-1, 2, 3))
+        elif self.spatial_dims == 3:
+            grid_warped = torch.einsum("qijk,bpq->bpijk", grid_padded, theta.reshape(-1, 3, 4))
+        else:
+            raise ValueError(f"do not support spatial_dims={self.spatial_dims}")
+        return grid_warped
+
+    def forward(self, x: List[torch.Tensor], image_size: List[int]) -> torch.Tensor:
+        f = x[0]
+        self.grid = self.grid.to(device=f.device)
+        theta = self.fc(f.reshape(f.shape[0], -1))
+        out: torch.Tensor = self.affine_transform(theta) - self.grid
+        return out
+
+
+class GlobalNet(RegUNet):
+    """
+    Build GlobalNet for image registration.
+
+    Reference:
+        Hu, Yipeng, et al.
+        "Label-driven weakly-supervised learning
+        for multimodal deformable image registration,"
+        https://arxiv.org/abs/1711.01666
+    """
+
+    def __init__(
+        self,
+        image_size: List[int],
+        spatial_dims: int,
+        in_channels: int,
+        num_channel_initial: int,
+        depth: int,
+        out_kernel_initializer: Optional[str] = "kaiming_uniform",
+        out_activation: Optional[str] = None,
+        pooling: bool = True,
+        concat_skip: bool = False,
+        encode_kernel_sizes: Union[int, List[int]] = 3,
+    ):
+        for size in image_size:
+            if size % (2**depth) != 0:
+                raise ValueError(
+                    f"given depth {depth}, "
+                    f"all input spatial dimension must be divisible by {2 ** depth}, "
+                    f"got input of size {image_size}"
+                )
+        self.image_size = image_size
+        self.decode_size = [size // (2**depth) for size in image_size]
+        super().__init__(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            num_channel_initial=num_channel_initial,
+            depth=depth,
+            out_kernel_initializer=out_kernel_initializer,
+            out_activation=out_activation,
+            out_channels=spatial_dims,
+            pooling=pooling,
+            concat_skip=concat_skip,
+            encode_kernel_sizes=encode_kernel_sizes,
+        )
+
+    def build_output_block(self):
+        return AffineHead(
+            spatial_dims=self.spatial_dims,
+            image_size=self.image_size,
+            decode_size=self.decode_size,
+            in_channels=self.num_channels[-1],
+        )
+
+
+class AdditiveUpSampleBlock(nn.Module):
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int):
+        super().__init__()
+        self.deconv = get_deconv_block(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output_size = (size * 2 for size in x.shape[2:])
+        deconved = self.deconv(x)
+        resized = F.interpolate(x, output_size)
+        resized = torch.sum(torch.stack(resized.split(split_size=resized.shape[1] // 2, dim=1), dim=-1), dim=-1)
+        out: torch.Tensor = deconved + resized
+        return out
+
+
+class LocalNet(RegUNet):
+    """
+    Reimplementation of LocalNet, based on:
+    `Weakly-supervised convolutional neural networks for multimodal image registration
+    <https://doi.org/10.1016/j.media.2018.07.002>`_.
+    `Label-driven weakly-supervised learning for multimodal deformable image registration
+    <https://arxiv.org/abs/1711.01666>`_.
+
+    Adapted from:
+        DeepReg (https://github.com/DeepRegNet/DeepReg)
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        num_channel_initial: int,
+        extract_levels: Tuple[int],
+        out_kernel_initializer: Optional[str] = "kaiming_uniform",
+        out_activation: Optional[str] = None,
+        out_channels: int = 3,
+        pooling: bool = True,
+        concat_skip: bool = False,
+    ):
+        """
+        Args:
+            spatial_dims: number of spatial dims
+            in_channels: number of input channels
+            num_channel_initial: number of initial channels
+            out_kernel_initializer: kernel initializer for the last layer
+            out_activation: activation at the last layer
+            out_channels: number of channels for the output
+            extract_levels: list, which levels from net to extract. The maximum level must equal to ``depth``
+            pooling: for down-sampling, use non-parameterized pooling if true, otherwise use conv3d
+            concat_skip: when up-sampling, concatenate skipped tensor if true, otherwise use addition
+        """
+        super().__init__(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            num_channel_initial=num_channel_initial,
+            depth=max(extract_levels),
+            out_kernel_initializer=out_kernel_initializer,
+            out_activation=out_activation,
+            out_channels=out_channels,
+            pooling=pooling,
+            concat_skip=concat_skip,
+            encode_kernel_sizes=[7] + [3] * max(extract_levels),
+        )
+
+    def build_bottom_block(self, in_channels: int, out_channels: int):
+        kernel_size = self.encode_kernel_sizes[self.depth]
+        return get_conv_block(
+            spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size
+        )
+
+    def build_up_sampling_block(self, in_channels: int, out_channels: int) -> nn.Module:
+        if self._use_additive_upsampling:
+            return AdditiveUpSampleBlock(
+                spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels
+            )
+
+        return get_deconv_block(spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/segresnet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/segresnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..299f1ca811a9a0c865e12c77f40d182bb5ea96c4
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/segresnet.py
@@ -0,0 +1,337 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from monai.networks.blocks.segresnet_block import ResBlock, get_conv_layer, get_upsample_layer
+from monai.networks.layers.factories import Dropout
+from monai.networks.layers.utils import get_act_layer, get_norm_layer
+from monai.utils import UpsampleMode
+
+__all__ = ["SegResNet", "SegResNetVAE"]
+
+
+class SegResNet(nn.Module):
+    """
+    SegResNet based on `3D MRI brain tumor segmentation using autoencoder regularization
+    <https://arxiv.org/pdf/1810.11654.pdf>`_.
+    The module does not include the variational autoencoder (VAE).
+    The model supports 2D or 3D inputs.
+
+    Args:
+        spatial_dims: spatial dimension of the input data. Defaults to 3.
+        init_filters: number of output channels for initial convolution layer. Defaults to 8.
+        in_channels: number of input channels for the network. Defaults to 1.
+        out_channels: number of output channels for the network. Defaults to 2.
+        dropout_prob: probability of an element to be zero-ed. Defaults to ``None``.
+        act: activation type and arguments. Defaults to ``RELU``.
+        norm: feature normalization type and arguments. Defaults to ``GROUP``.
+        norm_name: deprecating option for feature normalization type.
+        num_groups: deprecating option for group norm. parameters.
+        use_conv_final: if add a final convolution block to output. Defaults to ``True``.
+        blocks_down: number of down sample blocks in each layer. Defaults to ``[1,2,2,4]``.
+        blocks_up: number of up sample blocks in each layer. Defaults to ``[1,1,1]``.
+        upsample_mode: [``"deconv"``, ``"nontrainable"``, ``"pixelshuffle"``]
+            The mode of upsampling manipulations.
+            Using the ``nontrainable`` modes cannot guarantee the model's reproducibility. Defaults to``nontrainable``.
+
+            - ``deconv``, uses transposed convolution layers.
+            - ``nontrainable``, uses non-trainable `linear` interpolation.
+            - ``pixelshuffle``, uses :py:class:`monai.networks.blocks.SubpixelUpsample`.
+
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int = 3,
+        init_filters: int = 8,
+        in_channels: int = 1,
+        out_channels: int = 2,
+        dropout_prob: Optional[float] = None,
+        act: Union[Tuple, str] = ("RELU", {"inplace": True}),
+        norm: Union[Tuple, str] = ("GROUP", {"num_groups": 8}),
+        norm_name: str = "",
+        num_groups: int = 8,
+        use_conv_final: bool = True,
+        blocks_down: tuple = (1, 2, 2, 4),
+        blocks_up: tuple = (1, 1, 1),
+        upsample_mode: Union[UpsampleMode, str] = UpsampleMode.NONTRAINABLE,
+    ):
+        super().__init__()
+
+        if spatial_dims not in (2, 3):
+            raise ValueError("`spatial_dims` can only be 2 or 3.")
+
+        self.spatial_dims = spatial_dims
+        self.init_filters = init_filters
+        self.in_channels = in_channels
+        self.blocks_down = blocks_down
+        self.blocks_up = blocks_up
+        self.dropout_prob = dropout_prob
+        self.act = act  # input options
+        self.act_mod = get_act_layer(act)
+        if norm_name:
+            if norm_name.lower() != "group":
+                raise ValueError(f"Deprecating option 'norm_name={norm_name}', please use 'norm' instead.")
+            norm = ("group", {"num_groups": num_groups})
+        self.norm = norm
+        self.upsample_mode = UpsampleMode(upsample_mode)
+        self.use_conv_final = use_conv_final
+        self.convInit = get_conv_layer(spatial_dims, in_channels, init_filters)
+        self.down_layers = self._make_down_layers()
+        self.up_layers, self.up_samples = self._make_up_layers()
+        self.conv_final = self._make_final_conv(out_channels)
+
+        if dropout_prob is not None:
+            self.dropout = Dropout[Dropout.DROPOUT, spatial_dims](dropout_prob)
+
+    def _make_down_layers(self):
+        down_layers = nn.ModuleList()
+        blocks_down, spatial_dims, filters, norm = (self.blocks_down, self.spatial_dims, self.init_filters, self.norm)
+        for i in range(len(blocks_down)):
+            layer_in_channels = filters * 2**i
+            pre_conv = (
+                get_conv_layer(spatial_dims, layer_in_channels // 2, layer_in_channels, stride=2)
+                if i > 0
+                else nn.Identity()
+            )
+            down_layer = nn.Sequential(
+                pre_conv,
+                *[ResBlock(spatial_dims, layer_in_channels, norm=norm, act=self.act) for _ in range(blocks_down[i])],
+            )
+            down_layers.append(down_layer)
+        return down_layers
+
+    def _make_up_layers(self):
+        up_layers, up_samples = nn.ModuleList(), nn.ModuleList()
+        upsample_mode, blocks_up, spatial_dims, filters, norm = (
+            self.upsample_mode,
+            self.blocks_up,
+            self.spatial_dims,
+            self.init_filters,
+            self.norm,
+        )
+        n_up = len(blocks_up)
+        for i in range(n_up):
+            sample_in_channels = filters * 2 ** (n_up - i)
+            up_layers.append(
+                nn.Sequential(
+                    *[
+                        ResBlock(spatial_dims, sample_in_channels // 2, norm=norm, act=self.act)
+                        for _ in range(blocks_up[i])
+                    ]
+                )
+            )
+            up_samples.append(
+                nn.Sequential(
+                    *[
+                        get_conv_layer(spatial_dims, sample_in_channels, sample_in_channels // 2, kernel_size=1),
+                        get_upsample_layer(spatial_dims, sample_in_channels // 2, upsample_mode=upsample_mode),
+                    ]
+                )
+            )
+        return up_layers, up_samples
+
+    def _make_final_conv(self, out_channels: int):
+        return nn.Sequential(
+            get_norm_layer(name=self.norm, spatial_dims=self.spatial_dims, channels=self.init_filters),
+            self.act_mod,
+            get_conv_layer(self.spatial_dims, self.init_filters, out_channels, kernel_size=1, bias=True),
+        )
+
+    def encode(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        x = self.convInit(x)
+        if self.dropout_prob is not None:
+            x = self.dropout(x)
+
+        down_x = []
+
+        for down in self.down_layers:
+            x = down(x)
+            down_x.append(x)
+
+        return x, down_x
+
+    def decode(self, x: torch.Tensor, down_x: List[torch.Tensor]) -> torch.Tensor:
+        for i, (up, upl) in enumerate(zip(self.up_samples, self.up_layers)):
+            x = up(x) + down_x[i + 1]
+            x = upl(x)
+
+        if self.use_conv_final:
+            x = self.conv_final(x)
+
+        return x
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, down_x = self.encode(x)
+        down_x.reverse()
+
+        x = self.decode(x, down_x)
+        return x
+
+
+class SegResNetVAE(SegResNet):
+    """
+    SegResNetVAE based on `3D MRI brain tumor segmentation using autoencoder regularization
+    <https://arxiv.org/pdf/1810.11654.pdf>`_.
+    The module contains the variational autoencoder (VAE).
+    The model supports 2D or 3D inputs.
+
+    Args:
+        input_image_size: the size of images to input into the network. It is used to
+            determine the in_features of the fc layer in VAE.
+        vae_estimate_std: whether to estimate the standard deviations in VAE. Defaults to ``False``.
+        vae_default_std: if not to estimate the std, use the default value. Defaults to 0.3.
+        vae_nz: number of latent variables in VAE. Defaults to 256.
+            Where, 128 to represent mean, and 128 to represent std.
+        spatial_dims: spatial dimension of the input data. Defaults to 3.
+        init_filters: number of output channels for initial convolution layer. Defaults to 8.
+        in_channels: number of input channels for the network. Defaults to 1.
+        out_channels: number of output channels for the network. Defaults to 2.
+        dropout_prob: probability of an element to be zero-ed. Defaults to ``None``.
+        act: activation type and arguments. Defaults to ``RELU``.
+        norm: feature normalization type and arguments. Defaults to ``GROUP``.
+        use_conv_final: if add a final convolution block to output. Defaults to ``True``.
+        blocks_down: number of down sample blocks in each layer. Defaults to ``[1,2,2,4]``.
+        blocks_up: number of up sample blocks in each layer. Defaults to ``[1,1,1]``.
+        upsample_mode: [``"deconv"``, ``"nontrainable"``, ``"pixelshuffle"``]
+            The mode of upsampling manipulations.
+            Using the ``nontrainable`` modes cannot guarantee the model's reproducibility. Defaults to``nontrainable``.
+
+            - ``deconv``, uses transposed convolution layers.
+            - ``nontrainable``, uses non-trainable `linear` interpolation.
+            - ``pixelshuffle``, uses :py:class:`monai.networks.blocks.SubpixelUpsample`.
+    """
+
+    def __init__(
+        self,
+        input_image_size: Sequence[int],
+        vae_estimate_std: bool = False,
+        vae_default_std: float = 0.3,
+        vae_nz: int = 256,
+        spatial_dims: int = 3,
+        init_filters: int = 8,
+        in_channels: int = 1,
+        out_channels: int = 2,
+        dropout_prob: Optional[float] = None,
+        act: Union[str, tuple] = ("RELU", {"inplace": True}),
+        norm: Union[Tuple, str] = ("GROUP", {"num_groups": 8}),
+        use_conv_final: bool = True,
+        blocks_down: tuple = (1, 2, 2, 4),
+        blocks_up: tuple = (1, 1, 1),
+        upsample_mode: Union[UpsampleMode, str] = UpsampleMode.NONTRAINABLE,
+    ):
+        super().__init__(
+            spatial_dims=spatial_dims,
+            init_filters=init_filters,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            dropout_prob=dropout_prob,
+            act=act,
+            norm=norm,
+            use_conv_final=use_conv_final,
+            blocks_down=blocks_down,
+            blocks_up=blocks_up,
+            upsample_mode=upsample_mode,
+        )
+
+        self.input_image_size = input_image_size
+        self.smallest_filters = 16
+
+        zoom = 2 ** (len(self.blocks_down) - 1)
+        self.fc_insize = [s // (2 * zoom) for s in self.input_image_size]
+
+        self.vae_estimate_std = vae_estimate_std
+        self.vae_default_std = vae_default_std
+        self.vae_nz = vae_nz
+        self._prepare_vae_modules()
+        self.vae_conv_final = self._make_final_conv(in_channels)
+
+    def _prepare_vae_modules(self):
+        zoom = 2 ** (len(self.blocks_down) - 1)
+        v_filters = self.init_filters * zoom
+        total_elements = int(self.smallest_filters * np.prod(self.fc_insize))
+
+        self.vae_down = nn.Sequential(
+            get_norm_layer(name=self.norm, spatial_dims=self.spatial_dims, channels=v_filters),
+            self.act_mod,
+            get_conv_layer(self.spatial_dims, v_filters, self.smallest_filters, stride=2, bias=True),
+            get_norm_layer(name=self.norm, spatial_dims=self.spatial_dims, channels=self.smallest_filters),
+            self.act_mod,
+        )
+        self.vae_fc1 = nn.Linear(total_elements, self.vae_nz)
+        self.vae_fc2 = nn.Linear(total_elements, self.vae_nz)
+        self.vae_fc3 = nn.Linear(self.vae_nz, total_elements)
+
+        self.vae_fc_up_sample = nn.Sequential(
+            get_conv_layer(self.spatial_dims, self.smallest_filters, v_filters, kernel_size=1),
+            get_upsample_layer(self.spatial_dims, v_filters, upsample_mode=self.upsample_mode),
+            get_norm_layer(name=self.norm, spatial_dims=self.spatial_dims, channels=v_filters),
+            self.act_mod,
+        )
+
+    def _get_vae_loss(self, net_input: torch.Tensor, vae_input: torch.Tensor):
+        """
+        Args:
+            net_input: the original input of the network.
+            vae_input: the input of VAE module, which is also the output of the network's encoder.
+        """
+        x_vae = self.vae_down(vae_input)
+        x_vae = x_vae.view(-1, self.vae_fc1.in_features)
+        z_mean = self.vae_fc1(x_vae)
+
+        z_mean_rand = torch.randn_like(z_mean)
+        z_mean_rand.requires_grad_(False)
+
+        if self.vae_estimate_std:
+            z_sigma = self.vae_fc2(x_vae)
+            z_sigma = F.softplus(z_sigma)
+            vae_reg_loss = 0.5 * torch.mean(z_mean**2 + z_sigma**2 - torch.log(1e-8 + z_sigma**2) - 1)
+
+            x_vae = z_mean + z_sigma * z_mean_rand
+        else:
+            z_sigma = self.vae_default_std
+            vae_reg_loss = torch.mean(z_mean**2)
+
+            x_vae = z_mean + z_sigma * z_mean_rand
+
+        x_vae = self.vae_fc3(x_vae)
+        x_vae = self.act_mod(x_vae)
+        x_vae = x_vae.view([-1, self.smallest_filters] + self.fc_insize)
+        x_vae = self.vae_fc_up_sample(x_vae)
+
+        for up, upl in zip(self.up_samples, self.up_layers):
+            x_vae = up(x_vae)
+            x_vae = upl(x_vae)
+
+        x_vae = self.vae_conv_final(x_vae)
+        vae_mse_loss = F.mse_loss(net_input, x_vae)
+        vae_loss = vae_reg_loss + vae_mse_loss
+        return vae_loss
+
+    def forward(self, x):
+        net_input = x
+        x, down_x = self.encode(x)
+        down_x.reverse()
+
+        vae_input = x
+        x = self.decode(x, down_x)
+
+        if self.training:
+            vae_loss = self._get_vae_loss(net_input, vae_input)
+            return x, vae_loss
+
+        return x, None
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/senet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/senet.py
new file mode 100644
index 0000000000000000000000000000000000000000..a85d32ba5a84206b724cfa6f793c7bc98b133481
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/senet.py
@@ -0,0 +1,512 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from collections import OrderedDict
+from typing import Any, List, Optional, Sequence, Tuple, Type, Union
+
+import torch
+import torch.nn as nn
+from torch.hub import load_state_dict_from_url
+
+from monai.apps.utils import download_url
+from monai.networks.blocks.convolutions import Convolution
+from monai.networks.blocks.squeeze_and_excitation import SEBottleneck, SEResNetBottleneck, SEResNeXtBottleneck
+from monai.networks.layers.factories import Act, Conv, Dropout, Norm, Pool
+from monai.utils.module import look_up_option
+
+__all__ = [
+    "SENet",
+    "SENet154",
+    "SEResNet50",
+    "SEResNet101",
+    "SEResNet152",
+    "SEResNeXt50",
+    "SEResNext101",
+    "SE_NET_MODELS",
+]
+
+
+SE_NET_MODELS = {
+    "senet154": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth",
+    "se_resnet50": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth",
+    "se_resnet101": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth",
+    "se_resnet152": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth",
+    "se_resnext50_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth",
+    "se_resnext101_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth",
+}
+
+
+class SENet(nn.Module):
+    """
+    SENet based on `Squeeze-and-Excitation Networks <https://arxiv.org/pdf/1709.01507.pdf>`_.
+    Adapted from `Cadene Hub 2D version
+    <https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py>`_.
+
+    Args:
+        spatial_dims: spatial dimension of the input data.
+        in_channels: channel number of the input data.
+        block: SEBlock class.
+            for SENet154: SEBottleneck
+            for SE-ResNet models: SEResNetBottleneck
+            for SE-ResNeXt models:  SEResNeXtBottleneck
+        layers: number of residual blocks for 4 layers of the network (layer1...layer4).
+        groups: number of groups for the 3x3 convolution in each bottleneck block.
+            for SENet154: 64
+            for SE-ResNet models: 1
+            for SE-ResNeXt models:  32
+        reduction: reduction ratio for Squeeze-and-Excitation modules.
+            for all models: 16
+        dropout_prob: drop probability for the Dropout layer.
+            if `None` the Dropout layer is not used.
+            for SENet154: 0.2
+            for SE-ResNet models: None
+            for SE-ResNeXt models: None
+        dropout_dim: determine the dimensions of dropout. Defaults to 1.
+            When dropout_dim = 1, randomly zeroes some of the elements for each channel.
+            When dropout_dim = 2, Randomly zeroes out entire channels (a channel is a 2D feature map).
+            When dropout_dim = 3, Randomly zeroes out entire channels (a channel is a 3D feature map).
+        inplanes:  number of input channels for layer1.
+            for SENet154: 128
+            for SE-ResNet models: 64
+            for SE-ResNeXt models: 64
+        downsample_kernel_size: kernel size for downsampling convolutions in layer2, layer3 and layer4.
+            for SENet154: 3
+            for SE-ResNet models: 1
+            for SE-ResNeXt models: 1
+        input_3x3: If `True`, use three 3x3 convolutions instead of
+            a single 7x7 convolution in layer0.
+            - For SENet154: True
+            - For SE-ResNet models: False
+            - For SE-ResNeXt models: False
+        num_classes: number of outputs in `last_linear` layer.
+            for all models: 1000
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNeXtBottleneck]],
+        layers: Sequence[int],
+        groups: int,
+        reduction: int,
+        dropout_prob: Optional[float] = 0.2,
+        dropout_dim: int = 1,
+        inplanes: int = 128,
+        downsample_kernel_size: int = 3,
+        input_3x3: bool = True,
+        num_classes: int = 1000,
+    ) -> None:
+
+        super().__init__()
+
+        relu_type: Type[nn.ReLU] = Act[Act.RELU]
+        conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
+        pool_type: Type[Union[nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d]] = Pool[Pool.MAX, spatial_dims]
+        norm_type: Type[Union[nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
+        dropout_type: Type[Union[nn.Dropout, nn.Dropout2d, nn.Dropout3d]] = Dropout[Dropout.DROPOUT, dropout_dim]
+        avg_pool_type: Type[Union[nn.AdaptiveAvgPool1d, nn.AdaptiveAvgPool2d, nn.AdaptiveAvgPool3d]] = Pool[
+            Pool.ADAPTIVEAVG, spatial_dims
+        ]
+
+        self.inplanes = inplanes
+        self.spatial_dims = spatial_dims
+
+        layer0_modules: List[Tuple[str, Any]]
+
+        if input_3x3:
+            layer0_modules = [
+                (
+                    "conv1",
+                    conv_type(in_channels=in_channels, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False),
+                ),
+                ("bn1", norm_type(num_features=64)),
+                ("relu1", relu_type(inplace=True)),
+                ("conv2", conv_type(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)),
+                ("bn2", norm_type(num_features=64)),
+                ("relu2", relu_type(inplace=True)),
+                (
+                    "conv3",
+                    conv_type(in_channels=64, out_channels=inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                ),
+                ("bn3", norm_type(num_features=inplanes)),
+                ("relu3", relu_type(inplace=True)),
+            ]
+        else:
+            layer0_modules = [
+                (
+                    "conv1",
+                    conv_type(
+                        in_channels=in_channels, out_channels=inplanes, kernel_size=7, stride=2, padding=3, bias=False
+                    ),
+                ),
+                ("bn1", norm_type(num_features=inplanes)),
+                ("relu1", relu_type(inplace=True)),
+            ]
+
+        layer0_modules.append(("pool", pool_type(kernel_size=3, stride=2, ceil_mode=True)))
+        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
+        self.layer1 = self._make_layer(
+            block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1
+        )
+        self.layer2 = self._make_layer(
+            block,
+            planes=128,
+            blocks=layers[1],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+        )
+        self.layer3 = self._make_layer(
+            block,
+            planes=256,
+            blocks=layers[2],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+        )
+        self.layer4 = self._make_layer(
+            block,
+            planes=512,
+            blocks=layers[3],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+        )
+        self.adaptive_avg_pool = avg_pool_type(1)
+        self.dropout = dropout_type(dropout_prob) if dropout_prob is not None else None
+        self.last_linear = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, conv_type):
+                nn.init.kaiming_normal_(torch.as_tensor(m.weight))
+            elif isinstance(m, norm_type):
+                nn.init.constant_(torch.as_tensor(m.weight), 1)
+                nn.init.constant_(torch.as_tensor(m.bias), 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.constant_(torch.as_tensor(m.bias), 0)
+
+    def _make_layer(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNeXtBottleneck]],
+        planes: int,
+        blocks: int,
+        groups: int,
+        reduction: int,
+        stride: int = 1,
+        downsample_kernel_size: int = 1,
+    ) -> nn.Sequential:
+
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = Convolution(
+                spatial_dims=self.spatial_dims,
+                in_channels=self.inplanes,
+                out_channels=planes * block.expansion,
+                strides=stride,
+                kernel_size=downsample_kernel_size,
+                act=None,
+                norm=Norm.BATCH,
+                bias=False,
+            )
+
+        layers = []
+        layers.append(
+            block(
+                spatial_dims=self.spatial_dims,
+                inplanes=self.inplanes,
+                planes=planes,
+                groups=groups,
+                reduction=reduction,
+                stride=stride,
+                downsample=downsample,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _num in range(1, blocks):
+            layers.append(
+                block(
+                    spatial_dims=self.spatial_dims,
+                    inplanes=self.inplanes,
+                    planes=planes,
+                    groups=groups,
+                    reduction=reduction,
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def features(self, x: torch.Tensor):
+        x = self.layer0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def logits(self, x: torch.Tensor):
+        x = self.adaptive_avg_pool(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+        x = torch.flatten(x, 1)
+        x = self.last_linear(x)
+        return x
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        x = self.logits(x)
+        return x
+
+
+def _load_state_dict(model: nn.Module, arch: str, progress: bool):
+    """
+    This function is used to load pretrained models.
+    """
+    model_url = look_up_option(arch, SE_NET_MODELS, None)
+    if model_url is None:
+        raise ValueError(
+            "only 'senet154', 'se_resnet50', 'se_resnet101',  'se_resnet152', 'se_resnext50_32x4d', "
+            + "and se_resnext101_32x4d are supported to load pretrained weights."
+        )
+
+    pattern_conv = re.compile(r"^(layer[1-4]\.\d\.(?:conv)\d\.)(\w*)$")
+    pattern_bn = re.compile(r"^(layer[1-4]\.\d\.)(?:bn)(\d\.)(\w*)$")
+    pattern_se = re.compile(r"^(layer[1-4]\.\d\.)(?:se_module.fc1.)(\w*)$")
+    pattern_se2 = re.compile(r"^(layer[1-4]\.\d\.)(?:se_module.fc2.)(\w*)$")
+    pattern_down_conv = re.compile(r"^(layer[1-4]\.\d\.)(?:downsample.0.)(\w*)$")
+    pattern_down_bn = re.compile(r"^(layer[1-4]\.\d\.)(?:downsample.1.)(\w*)$")
+
+    if isinstance(model_url, dict):
+        download_url(model_url["url"], filepath=model_url["filename"])
+        state_dict = torch.load(model_url["filename"], map_location=None)
+    else:
+        state_dict = load_state_dict_from_url(model_url, progress=progress)
+    for key in list(state_dict.keys()):
+        new_key = None
+        if pattern_conv.match(key):
+            new_key = re.sub(pattern_conv, r"\1conv.\2", key)
+        elif pattern_bn.match(key):
+            new_key = re.sub(pattern_bn, r"\1conv\2adn.N.\3", key)
+        elif pattern_se.match(key):
+            state_dict[key] = state_dict[key].squeeze()
+            new_key = re.sub(pattern_se, r"\1se_layer.fc.0.\2", key)
+        elif pattern_se2.match(key):
+            state_dict[key] = state_dict[key].squeeze()
+            new_key = re.sub(pattern_se2, r"\1se_layer.fc.2.\2", key)
+        elif pattern_down_conv.match(key):
+            new_key = re.sub(pattern_down_conv, r"\1project.conv.\2", key)
+        elif pattern_down_bn.match(key):
+            new_key = re.sub(pattern_down_bn, r"\1project.adn.N.\2", key)
+        if new_key:
+            state_dict[new_key] = state_dict[key]
+            del state_dict[key]
+
+    model_dict = model.state_dict()
+    state_dict = {
+        k: v for k, v in state_dict.items() if (k in model_dict) and (model_dict[k].shape == state_dict[k].shape)
+    }
+    model_dict.update(state_dict)
+    model.load_state_dict(model_dict)
+
+
+class SENet154(SENet):
+    """SENet154 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2."""
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 8, 36, 3),
+        groups: int = 64,
+        reduction: int = 16,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(block=SEBottleneck, layers=layers, groups=groups, reduction=reduction, **kwargs)
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "senet154", progress)
+
+
+class SEResNet50(SENet):
+    """SEResNet50 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2."""
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 4, 6, 3),
+        groups: int = 1,
+        reduction: int = 16,
+        dropout_prob: Optional[float] = None,
+        inplanes: int = 64,
+        downsample_kernel_size: int = 1,
+        input_3x3: bool = False,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            block=SEResNetBottleneck,
+            layers=layers,
+            groups=groups,
+            reduction=reduction,
+            dropout_prob=dropout_prob,
+            inplanes=inplanes,
+            downsample_kernel_size=downsample_kernel_size,
+            input_3x3=input_3x3,
+            **kwargs,
+        )
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "se_resnet50", progress)
+
+
+class SEResNet101(SENet):
+    """
+    SEResNet101 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2.
+    """
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 4, 23, 3),
+        groups: int = 1,
+        reduction: int = 16,
+        inplanes: int = 64,
+        downsample_kernel_size: int = 1,
+        input_3x3: bool = False,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            block=SEResNetBottleneck,
+            layers=layers,
+            groups=groups,
+            reduction=reduction,
+            inplanes=inplanes,
+            downsample_kernel_size=downsample_kernel_size,
+            input_3x3=input_3x3,
+            **kwargs,
+        )
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "se_resnet101", progress)
+
+
+class SEResNet152(SENet):
+    """
+    SEResNet152 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2.
+    """
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 8, 36, 3),
+        groups: int = 1,
+        reduction: int = 16,
+        inplanes: int = 64,
+        downsample_kernel_size: int = 1,
+        input_3x3: bool = False,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            block=SEResNetBottleneck,
+            layers=layers,
+            groups=groups,
+            reduction=reduction,
+            inplanes=inplanes,
+            downsample_kernel_size=downsample_kernel_size,
+            input_3x3=input_3x3,
+            **kwargs,
+        )
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "se_resnet152", progress)
+
+
+class SEResNext50(SENet):
+    """
+    SEResNext50 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2.
+    """
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 4, 6, 3),
+        groups: int = 32,
+        reduction: int = 16,
+        dropout_prob: Optional[float] = None,
+        inplanes: int = 64,
+        downsample_kernel_size: int = 1,
+        input_3x3: bool = False,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            block=SEResNeXtBottleneck,
+            layers=layers,
+            groups=groups,
+            dropout_prob=dropout_prob,
+            reduction=reduction,
+            inplanes=inplanes,
+            downsample_kernel_size=downsample_kernel_size,
+            input_3x3=input_3x3,
+            **kwargs,
+        )
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "se_resnext50_32x4d", progress)
+
+
+class SEResNext101(SENet):
+    """
+    SEResNext101 based on `Squeeze-and-Excitation Networks` with optional pretrained support when spatial_dims is 2.
+    """
+
+    def __init__(
+        self,
+        layers: Sequence[int] = (3, 4, 23, 3),
+        groups: int = 32,
+        reduction: int = 16,
+        dropout_prob: Optional[float] = None,
+        inplanes: int = 64,
+        downsample_kernel_size: int = 1,
+        input_3x3: bool = False,
+        pretrained: bool = False,
+        progress: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            block=SEResNeXtBottleneck,
+            layers=layers,
+            groups=groups,
+            dropout_prob=dropout_prob,
+            reduction=reduction,
+            inplanes=inplanes,
+            downsample_kernel_size=downsample_kernel_size,
+            input_3x3=input_3x3,
+            **kwargs,
+        )
+        if pretrained:
+            # it only worked when `spatial_dims` is 2
+            _load_state_dict(self, "se_resnext101_32x4d", progress)
+
+
+SEnet = Senet = SENet
+SEnet154 = Senet154 = senet154 = SENet154
+SEresnet50 = Seresnet50 = seresnet50 = SEResNet50
+SEresnet101 = Seresnet101 = seresnet101 = SEResNet101
+SEresnet152 = Seresnet152 = seresnet152 = SEResNet152
+SEResNeXt50 = SEresnext50 = Seresnext50 = seresnext50 = SEResNext50
+SEResNeXt101 = SEresnext101 = Seresnext101 = seresnext101 = SEResNext101
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/torchvision_fc.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/torchvision_fc.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddfee6e0416cf48230ea3b6d4d8a793f0ff6d3a3
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/torchvision_fc.py
@@ -0,0 +1,74 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Optional, Tuple
+
+from monai.networks.nets import NetAdapter
+from monai.utils import deprecated_arg, optional_import
+
+models, _ = optional_import("torchvision.models")
+
+
+__all__ = ["TorchVisionFCModel"]
+
+
+class TorchVisionFCModel(NetAdapter):
+    """
+    Customize the fully connected layer of TorchVision model or replace it by convolutional layer.
+
+    Args:
+        model_name: name of any torchvision model with fully connected layer at the end.
+            ``resnet18`` (default), ``resnet34``, ``resnet50``, ``resnet101``, ``resnet152``,
+            ``resnext50_32x4d``, ``resnext101_32x8d``, ``wide_resnet50_2``, ``wide_resnet101_2``.
+            model details: https://pytorch.org/vision/stable/models.html.
+        num_classes: number of classes for the last classification layer. Default to 1.
+        dim: number of supported spatial dimensions in the specified model, depends on the model implementation.
+            default to 2 as most Torchvision models are for 2D image processing.
+        in_channels: number of the input channels of last layer. if None, get it from `in_features` of last layer.
+        use_conv: whether use convolutional layer to replace the last layer, default to False.
+        pool: parameters for the pooling layer, it should be a tuple, the first item is name of the pooling layer,
+            the second item is dictionary of the initialization args. if None, will not replace the `layers[-2]`.
+            default to `("avg", {"kernel_size": 7, "stride": 1})`.
+        bias: the bias value when replacing the last layer. if False, the layer will not learn an additive bias,
+            default to True.
+        pretrained: whether to use the imagenet pretrained weights. Default to False.
+    """
+
+    @deprecated_arg("n_classes", since="0.6")
+    def __init__(
+        self,
+        model_name: str = "resnet18",
+        num_classes: int = 1,
+        dim: int = 2,
+        in_channels: Optional[int] = None,
+        use_conv: bool = False,
+        pool: Optional[Tuple[str, Dict[str, Any]]] = ("avg", {"kernel_size": 7, "stride": 1}),
+        bias: bool = True,
+        pretrained: bool = False,
+        n_classes: Optional[int] = None,
+    ):
+        # in case the new num_classes is default but you still call deprecated n_classes
+        if n_classes is not None and num_classes == 1:
+            num_classes = n_classes
+        model = getattr(models, model_name)(pretrained=pretrained)
+        # check if the model is compatible, should have a FC layer at the end
+        if not str(list(model.children())[-1]).startswith("Linear"):
+            raise ValueError(f"Model ['{model_name}'] does not have a Linear layer at the end.")
+
+        super().__init__(
+            model=model,
+            num_classes=num_classes,
+            dim=dim,
+            in_channels=in_channels,
+            use_conv=use_conv,
+            pool=pool,
+            bias=bias,
+        )
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/transchex.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/transchex.py
new file mode 100644
index 0000000000000000000000000000000000000000..b03ff5a17d2bb8738575624e06144e1d3519957b
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/transchex.py
@@ -0,0 +1,378 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import shutil
+import tarfile
+import tempfile
+from typing import Sequence, Tuple, Union
+
+import torch
+from torch import nn
+
+from monai.utils import optional_import
+
+transformers = optional_import("transformers")
+load_tf_weights_in_bert = optional_import("transformers", name="load_tf_weights_in_bert")
+cached_path = optional_import("transformers.file_utils", name="cached_path")[0]
+BertEmbeddings = optional_import("transformers.models.bert.modeling_bert", name="BertEmbeddings")[0]
+BertLayer = optional_import("transformers.models.bert.modeling_bert", name="BertLayer")[0]
+
+__all__ = ["BertPreTrainedModel", "BertAttention", "BertOutput", "BertMixedLayer", "Pooler", "MultiModal", "Transchex"]
+
+
+class BertPreTrainedModel(nn.Module):
+    """Module to load BERT pre-trained weights.
+    Based on:
+    LXMERT
+    https://github.com/airsplay/lxmert
+    BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, *inputs, **kwargs) -> None:
+        super().__init__()
+
+    def init_bert_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        num_language_layers,
+        num_vision_layers,
+        num_mixed_layers,
+        bert_config,
+        state_dict=None,
+        cache_dir=None,
+        from_tf=False,
+        *inputs,
+        **kwargs,
+    ):
+        archive_file = "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz"
+        resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        tempdir = None
+        if os.path.isdir(resolved_archive_file) or from_tf:
+            serialization_dir = resolved_archive_file
+        else:
+            tempdir = tempfile.mkdtemp()
+            with tarfile.open(resolved_archive_file, "r:gz") as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        model = cls(num_language_layers, num_vision_layers, num_mixed_layers, bert_config, *inputs, **kwargs)
+        if state_dict is None and not from_tf:
+            weights_path = os.path.join(serialization_dir, "pytorch_model.bin")
+            state_dict = torch.load(weights_path, map_location="cpu" if not torch.cuda.is_available() else None)
+        if tempdir:
+            shutil.rmtree(tempdir)
+        if from_tf:
+            weights_path = os.path.join(serialization_dir, "model.ckpt")
+            return load_tf_weights_in_bert(model, weights_path)
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if "gamma" in key:
+                new_key = key.replace("gamma", "weight")
+            if "beta" in key:
+                new_key = key.replace("beta", "bias")
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        metadata = getattr(state_dict, "_metadata", None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=""):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs
+            )
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + ".")
+
+        start_prefix = ""
+        if not hasattr(model, "bert") and any(s.startswith("bert.") for s in state_dict.keys()):
+            start_prefix = "bert."
+        load(model, prefix=start_prefix)
+        return model
+
+
+class BertAttention(nn.Module):
+    """BERT attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, context):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(context)
+        mixed_value_layer = self.value(context)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        attention_probs = self.dropout(nn.Softmax(dim=-1)(attention_scores))
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertOutput(nn.Module):
+    """BERT output layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = torch.nn.LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertMixedLayer(nn.Module):
+    """BERT cross attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.att_x = BertAttention(config)
+        self.output_x = BertOutput(config)
+        self.att_y = BertAttention(config)
+        self.output_y = BertOutput(config)
+
+    def forward(self, x, y):
+        output_x = self.att_x(x, y)
+        output_y = self.att_y(y, x)
+        return self.output_x(output_x, x), self.output_y(output_y, y)
+
+
+class Pooler(nn.Module):
+    """BERT pooler layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, hidden_size) -> None:
+        super().__init__()
+        self.dense = nn.Linear(hidden_size, hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class MultiModal(BertPreTrainedModel):
+    """
+    Multimodal Transformers From Pretrained BERT Weights"
+    """
+
+    def __init__(
+        self, num_language_layers: int, num_vision_layers: int, num_mixed_layers: int, bert_config: dict
+    ) -> None:
+        """
+        Args:
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            bert_config: configuration for bert language transformer encoder.
+
+        """
+        super().__init__()
+        self.config = type("obj", (object,), bert_config)
+        self.embeddings = BertEmbeddings(self.config)
+        self.language_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_language_layers)])
+        self.vision_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_vision_layers)])
+        self.mixed_encoder = nn.ModuleList([BertMixedLayer(self.config) for _ in range(num_mixed_layers)])
+        self.apply(self.init_bert_weights)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None, attention_mask=None):
+        language_features = self.embeddings(input_ids, token_type_ids)
+        for layer in self.vision_encoder:
+            vision_feats = layer(vision_feats, None)[0]
+        for layer in self.language_encoder:
+            language_features = layer(language_features, attention_mask)[0]
+        for layer in self.mixed_encoder:
+            language_features, vision_feats = layer(language_features, vision_feats)
+        return language_features, vision_feats
+
+
+class Transchex(torch.nn.Module):
+    """
+    TransChex based on: "Hatamizadeh et al.,TransCheX: Self-Supervised Pretraining of Vision-Language
+    Transformers for Chest X-ray Analysis"
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Union[Sequence[int], int],
+        patch_size: Union[int, Tuple[int, int]],
+        num_classes: int,
+        num_language_layers: int,
+        num_vision_layers: int,
+        num_mixed_layers: int,
+        hidden_size: int = 768,
+        drop_out: float = 0.0,
+        attention_probs_dropout_prob: float = 0.1,
+        gradient_checkpointing: bool = False,
+        hidden_act: str = "gelu",
+        hidden_dropout_prob: float = 0.1,
+        initializer_range: float = 0.02,
+        intermediate_size: int = 3072,
+        layer_norm_eps: float = 1e-12,
+        max_position_embeddings: int = 512,
+        model_type: str = "bert",
+        num_attention_heads: int = 12,
+        num_hidden_layers: int = 12,
+        pad_token_id: int = 0,
+        position_embedding_type: str = "absolute",
+        transformers_version: str = "4.10.2",
+        type_vocab_size: int = 2,
+        use_cache: bool = True,
+        vocab_size: int = 30522,
+        chunk_size_feed_forward: int = 0,
+        is_decoder: bool = False,
+        add_cross_attention: bool = False,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            num_classes: number of classes if classification is used.
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            num_mixed_layers: number of mixed transformer layers.
+            drop_out: faction of the input units to drop.
+
+        The other parameters are part of the `bert_config` to `MultiModal.from_pretrained`.
+
+        Examples:
+
+        .. code-block:: python
+
+            # for 3-channel with image size of (224,224), patch size of (32,32), 3 classes, 2 language layers,
+            # 2 vision layers, 2 mixed modality layers and dropout of 0.2 in the classification head
+            net = Transchex(in_channels=3,
+                                 img_size=(224, 224),
+                                 num_classes=3,
+                                 num_language_layers=2,
+                                 num_vision_layers=2,
+                                 num_mixed_layers=2,
+                                 drop_out=0.2)
+
+        """
+        super().__init__()
+        bert_config = {
+            "attention_probs_dropout_prob": attention_probs_dropout_prob,
+            "classifier_dropout": None,
+            "gradient_checkpointing": gradient_checkpointing,
+            "hidden_act": hidden_act,
+            "hidden_dropout_prob": hidden_dropout_prob,
+            "hidden_size": hidden_size,
+            "initializer_range": initializer_range,
+            "intermediate_size": intermediate_size,
+            "layer_norm_eps": layer_norm_eps,
+            "max_position_embeddings": max_position_embeddings,
+            "model_type": model_type,
+            "num_attention_heads": num_attention_heads,
+            "num_hidden_layers": num_hidden_layers,
+            "pad_token_id": pad_token_id,
+            "position_embedding_type": position_embedding_type,
+            "transformers_version": transformers_version,
+            "type_vocab_size": type_vocab_size,
+            "use_cache": use_cache,
+            "vocab_size": vocab_size,
+            "chunk_size_feed_forward": chunk_size_feed_forward,
+            "is_decoder": is_decoder,
+            "add_cross_attention": add_cross_attention,
+        }
+        if not (0 <= drop_out <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+
+        if (img_size[0] % patch_size[0] != 0) or (img_size[1] % patch_size[1] != 0):  # type: ignore
+            raise ValueError("img_size should be divisible by patch_size.")
+
+        self.multimodal = MultiModal.from_pretrained(
+            num_language_layers=num_language_layers,
+            num_vision_layers=num_vision_layers,
+            num_mixed_layers=num_mixed_layers,
+            bert_config=bert_config,
+        )
+
+        self.patch_size = patch_size
+        self.num_patches = (img_size[0] // self.patch_size[0]) * (img_size[1] // self.patch_size[1])  # type: ignore
+        self.vision_proj = nn.Conv2d(
+            in_channels=in_channels, out_channels=hidden_size, kernel_size=self.patch_size, stride=self.patch_size
+        )
+        self.norm_vision_pos = nn.LayerNorm(hidden_size)
+        self.pos_embed_vis = nn.Parameter(torch.zeros(1, self.num_patches, hidden_size))
+        self.pooler = Pooler(hidden_size=hidden_size)
+        self.drop = torch.nn.Dropout(drop_out)
+        self.cls_head = torch.nn.Linear(hidden_size, num_classes)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None):
+        attention_mask = torch.ones_like(input_ids).unsqueeze(1).unsqueeze(2)
+        attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)
+        attention_mask = (1.0 - attention_mask) * -10000.0
+        vision_feats = self.vision_proj(vision_feats).flatten(2).transpose(1, 2)
+        vision_feats = self.norm_vision_pos(vision_feats)
+        vision_feats = vision_feats + self.pos_embed_vis
+        hidden_state_lang, hidden_state_vis = self.multimodal(
+            input_ids=input_ids, token_type_ids=token_type_ids, vision_feats=vision_feats, attention_mask=attention_mask
+        )
+        pooled_features = self.pooler(hidden_state_lang)
+        logits = self.cls_head(self.drop(pooled_features))
+        return logits
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..faccddee45573331f689a8bb38569e08a0b734c3
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unet.py
@@ -0,0 +1,315 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from typing import Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks.convolutions import Convolution, ResidualUnit
+from monai.networks.layers.factories import Act, Norm
+from monai.networks.layers.simplelayers import SkipConnection
+from monai.utils import alias, deprecated_arg, export
+
+__all__ = ["UNet", "Unet"]
+
+
+@export("monai.networks.nets")
+@alias("Unet")
+class UNet(nn.Module):
+    """
+    Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
+    The residual part uses a convolution to change the input dimensions to match the output dimensions
+    if this is necessary but will use nn.Identity if not.
+    Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40.
+
+    Each layer of the network has a encode and decode path with a skip connection between them. Data in the encode path
+    is downsampled using strided convolutions (if `strides` is given values greater than 1) and in the decode path
+    upsampled using strided transpose convolutions. These down or up sampling operations occur at the beginning of each
+    block rather than afterwards as is typical in UNet implementations.
+
+    To further explain this consider the first example network given below. This network has 3 layers with strides
+    of 2 for each of the middle layers (the last layer is the bottom connection which does not down/up sample). Input
+    data to this network is immediately reduced in the spatial dimensions by a factor of 2 by the first convolution of
+    the residual unit defining the first layer of the encode part. The last layer of the decode part will upsample its
+    input (data from the previous layer concatenated with data from the skip connection) in the first convolution. this
+    ensures the final output of the network has the same shape as the input.
+
+    Padding values for the convolutions are chosen to ensure output sizes are even divisors/multiples of the input
+    sizes if the `strides` value for a layer is a factor of the input sizes. A typical case is to use `strides` values
+    of 2 and inputs that are multiples of powers of 2. An input can thus be downsampled evenly however many times its
+    dimensions can be divided by 2, so for the example network inputs would have to have dimensions that are multiples
+    of 4. In the second example network given below the input to the bottom layer will have shape (1, 64, 15, 15) for
+    an input of shape (1, 1, 240, 240) demonstrating the input being reduced in size spatially by 2**4.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+        adn_ordering: a string representing the ordering of activation (A), normalization (N), and dropout (D).
+            Defaults to "NDA". See also: :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        from monai.networks.nets import UNet
+
+        # 3 layer network with down/upsampling by a factor of 2 at each layer with 2-convolution residual units
+        net = UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16),
+            strides=(2, 2),
+            num_res_units=2
+        )
+
+        # 5 layer network with simple convolution/normalization/dropout/activation blocks defining the layers
+        net=UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16, 32, 64),
+            strides=(2, 2, 2, 2),
+        )
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Note: The acceptable spatial size of input data depends on the parameters of the network,
+        to set appropriate spatial size, please check the tutorial for more details:
+        https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb.
+        Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the
+        input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network,
+        the inputs must have spatial dimensions that are all multiples of 2^N.
+        Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data.
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        up_kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 0,
+        act: Union[Tuple, str] = Act.PRELU,
+        norm: Union[Tuple, str] = Norm.INSTANCE,
+        dropout: float = 0.0,
+        bias: bool = True,
+        adn_ordering: str = "NDA",
+        dimensions: Optional[int] = None,
+    ) -> None:
+
+        super().__init__()
+
+        if len(channels) < 2:
+            raise ValueError("the length of `channels` should be no less than 2.")
+        delta = len(strides) - (len(channels) - 1)
+        if delta < 0:
+            raise ValueError("the length of `strides` should equal to `len(channels) - 1`.")
+        if delta > 0:
+            warnings.warn(f"`len(strides) > len(channels) - 1`, the last {delta} values of strides will not be used.")
+        if dimensions is not None:
+            spatial_dims = dimensions
+        if isinstance(kernel_size, Sequence):
+            if len(kernel_size) != spatial_dims:
+                raise ValueError("the length of `kernel_size` should equal to `dimensions`.")
+        if isinstance(up_kernel_size, Sequence):
+            if len(up_kernel_size) != spatial_dims:
+                raise ValueError("the length of `up_kernel_size` should equal to `dimensions`.")
+
+        self.dimensions = spatial_dims
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.channels = channels
+        self.strides = strides
+        self.kernel_size = kernel_size
+        self.up_kernel_size = up_kernel_size
+        self.num_res_units = num_res_units
+        self.act = act
+        self.norm = norm
+        self.dropout = dropout
+        self.bias = bias
+        self.adn_ordering = adn_ordering
+
+        def _create_block(
+            inc: int, outc: int, channels: Sequence[int], strides: Sequence[int], is_top: bool
+        ) -> nn.Module:
+            """
+            Builds the UNet structure from the bottom up by recursing down to the bottom block, then creating sequential
+            blocks containing the downsample path, a skip connection around the previous block, and the upsample path.
+
+            Args:
+                inc: number of input channels.
+                outc: number of output channels.
+                channels: sequence of channels. Top block first.
+                strides: convolution stride.
+                is_top: True if this is the top block.
+            """
+            c = channels[0]
+            s = strides[0]
+
+            subblock: nn.Module
+
+            if len(channels) > 2:
+                subblock = _create_block(c, c, channels[1:], strides[1:], False)  # continue recursion down
+                upc = c * 2
+            else:
+                # the next layer is the bottom so stop recursion, create the bottom layer as the sublock for this layer
+                subblock = self._get_bottom_layer(c, channels[1])
+                upc = c + channels[1]
+
+            down = self._get_down_layer(inc, c, s, is_top)  # create layer in downsampling path
+            up = self._get_up_layer(upc, outc, s, is_top)  # create layer in upsampling path
+
+            return self._get_connection_block(down, up, subblock)
+
+        self.model = _create_block(in_channels, out_channels, self.channels, self.strides, True)
+
+    def _get_connection_block(self, down_path: nn.Module, up_path: nn.Module, subblock: nn.Module) -> nn.Module:
+        """
+        Returns the block object defining a layer of the UNet structure including the implementation of the skip
+        between encoding (down) and and decoding (up) sides of the network.
+
+        Args:
+            down_path: encoding half of the layer
+            up_path: decoding half of the layer
+            subblock: block defining the next layer in the network.
+        Returns: block for this layer: `nn.Sequential(down_path, SkipConnection(subblock), up_path)`
+        """
+        return nn.Sequential(down_path, SkipConnection(subblock), up_path)
+
+    def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_top: bool) -> nn.Module:
+        """
+        Returns the encoding (down) part of a layer of the network. This typically will downsample data at some point
+        in its structure. Its output is used as input to the next layer down and is concatenated with output from the
+        next layer to form the input for the decode (up) part of the layer.
+
+        Args:
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            strides: convolution stride.
+            is_top: True if this is the top block.
+        """
+        mod: nn.Module
+        if self.num_res_units > 0:
+
+            mod = ResidualUnit(
+                self.dimensions,
+                in_channels,
+                out_channels,
+                strides=strides,
+                kernel_size=self.kernel_size,
+                subunits=self.num_res_units,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+                adn_ordering=self.adn_ordering,
+            )
+            return mod
+        mod = Convolution(
+            self.dimensions,
+            in_channels,
+            out_channels,
+            strides=strides,
+            kernel_size=self.kernel_size,
+            act=self.act,
+            norm=self.norm,
+            dropout=self.dropout,
+            bias=self.bias,
+            adn_ordering=self.adn_ordering,
+        )
+        return mod
+
+    def _get_bottom_layer(self, in_channels: int, out_channels: int) -> nn.Module:
+        """
+        Returns the bottom or bottleneck layer at the bottom of the network linking encode to decode halves.
+
+        Args:
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+        """
+        return self._get_down_layer(in_channels, out_channels, 1, False)
+
+    def _get_up_layer(self, in_channels: int, out_channels: int, strides: int, is_top: bool) -> nn.Module:
+        """
+        Returns the decoding (up) part of a layer of the network. This typically will upsample data at some point
+        in its structure. Its output is used as input to the next layer up.
+
+        Args:
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            strides: convolution stride.
+            is_top: True if this is the top block.
+        """
+        conv: Union[Convolution, nn.Sequential]
+
+        conv = Convolution(
+            self.dimensions,
+            in_channels,
+            out_channels,
+            strides=strides,
+            kernel_size=self.up_kernel_size,
+            act=self.act,
+            norm=self.norm,
+            dropout=self.dropout,
+            bias=self.bias,
+            conv_only=is_top and self.num_res_units == 0,
+            is_transposed=True,
+            adn_ordering=self.adn_ordering,
+        )
+
+        if self.num_res_units > 0:
+            ru = ResidualUnit(
+                self.dimensions,
+                out_channels,
+                out_channels,
+                strides=1,
+                kernel_size=self.kernel_size,
+                subunits=1,
+                act=self.act,
+                norm=self.norm,
+                dropout=self.dropout,
+                bias=self.bias,
+                last_conv_only=is_top,
+                adn_ordering=self.adn_ordering,
+            )
+            conv = nn.Sequential(conv, ru)
+
+        return conv
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.model(x)
+        return x
+
+
+Unet = UNet
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unetr.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unetr.py
new file mode 100644
index 0000000000000000000000000000000000000000..c53936d27feccef396a6d8f2ec5c790e2b3ec313
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/unetr.py
@@ -0,0 +1,205 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Sequence, Tuple, Union
+
+import torch.nn as nn
+
+from monai.networks.blocks.dynunet_block import UnetOutBlock
+from monai.networks.blocks.unetr_block import UnetrBasicBlock, UnetrPrUpBlock, UnetrUpBlock
+from monai.networks.nets.vit import ViT
+from monai.utils import ensure_tuple_rep
+
+
+class UNETR(nn.Module):
+    """
+    UNETR based on: "Hatamizadeh et al.,
+    UNETR: Transformers for 3D Medical Image Segmentation <https://arxiv.org/abs/2103.10504>"
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        img_size: Union[Sequence[int], int],
+        feature_size: int = 16,
+        hidden_size: int = 768,
+        mlp_dim: int = 3072,
+        num_heads: int = 12,
+        pos_embed: str = "conv",
+        norm_name: Union[Tuple, str] = "instance",
+        conv_block: bool = True,
+        res_block: bool = True,
+        dropout_rate: float = 0.0,
+        spatial_dims: int = 3,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            out_channels: dimension of output channels.
+            img_size: dimension of input image.
+            feature_size: dimension of network feature size.
+            hidden_size: dimension of hidden layer.
+            mlp_dim: dimension of feedforward layer.
+            num_heads: number of attention heads.
+            pos_embed: position embedding layer type.
+            norm_name: feature normalization type and arguments.
+            conv_block: bool argument to determine if convolutional block is used.
+            res_block: bool argument to determine if residual block is used.
+            dropout_rate: faction of the input units to drop.
+            spatial_dims: number of spatial dims.
+
+        Examples::
+
+            # for single channel input 4-channel output with image size of (96,96,96), feature size of 32 and batch norm
+            >>> net = UNETR(in_channels=1, out_channels=4, img_size=(96,96,96), feature_size=32, norm_name='batch')
+
+             # for single channel input 4-channel output with image size of (96,96), feature size of 32 and batch norm
+            >>> net = UNETR(in_channels=1, out_channels=4, img_size=96, feature_size=32, norm_name='batch', spatial_dims=2)
+
+            # for 4-channel input 3-channel output with image size of (128,128,128), conv position embedding and instance norm
+            >>> net = UNETR(in_channels=4, out_channels=3, img_size=(128,128,128), pos_embed='conv', norm_name='instance')
+
+        """
+
+        super().__init__()
+
+        if not (0 <= dropout_rate <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+
+        if hidden_size % num_heads != 0:
+            raise ValueError("hidden_size should be divisible by num_heads.")
+
+        self.num_layers = 12
+        img_size = ensure_tuple_rep(img_size, spatial_dims)
+        self.patch_size = ensure_tuple_rep(16, spatial_dims)
+        self.feat_size = tuple(img_d // p_d for img_d, p_d in zip(img_size, self.patch_size))
+        self.hidden_size = hidden_size
+        self.classification = False
+        self.vit = ViT(
+            in_channels=in_channels,
+            img_size=img_size,
+            patch_size=self.patch_size,
+            hidden_size=hidden_size,
+            mlp_dim=mlp_dim,
+            num_layers=self.num_layers,
+            num_heads=num_heads,
+            pos_embed=pos_embed,
+            classification=self.classification,
+            dropout_rate=dropout_rate,
+            spatial_dims=spatial_dims,
+        )
+        self.encoder1 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            out_channels=feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.encoder2 = UnetrPrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_size,
+            out_channels=feature_size * 2,
+            num_layer=2,
+            kernel_size=3,
+            stride=1,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            conv_block=conv_block,
+            res_block=res_block,
+        )
+        self.encoder3 = UnetrPrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_size,
+            out_channels=feature_size * 4,
+            num_layer=1,
+            kernel_size=3,
+            stride=1,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            conv_block=conv_block,
+            res_block=res_block,
+        )
+        self.encoder4 = UnetrPrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_size,
+            out_channels=feature_size * 8,
+            num_layer=0,
+            kernel_size=3,
+            stride=1,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            conv_block=conv_block,
+            res_block=res_block,
+        )
+        self.decoder5 = UnetrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_size,
+            out_channels=feature_size * 8,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder4 = UnetrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=feature_size * 8,
+            out_channels=feature_size * 4,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder3 = UnetrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=feature_size * 4,
+            out_channels=feature_size * 2,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder2 = UnetrUpBlock(
+            spatial_dims=spatial_dims,
+            in_channels=feature_size * 2,
+            out_channels=feature_size,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.out = UnetOutBlock(spatial_dims=spatial_dims, in_channels=feature_size, out_channels=out_channels)
+        self.proj_axes = (0, spatial_dims + 1) + tuple(d + 1 for d in range(spatial_dims))
+        self.proj_view_shape = list(self.feat_size) + [self.hidden_size]
+
+    def proj_feat(self, x):
+        new_view = [x.size(0)] + self.proj_view_shape
+        x = x.view(new_view)
+        x = x.permute(self.proj_axes).contiguous()
+        return x
+
+    def forward(self, x_in):
+        x, hidden_states_out = self.vit(x_in)
+        enc1 = self.encoder1(x_in)
+        x2 = hidden_states_out[3]
+        enc2 = self.encoder2(self.proj_feat(x2))
+        x3 = hidden_states_out[6]
+        enc3 = self.encoder3(self.proj_feat(x3))
+        x4 = hidden_states_out[9]
+        enc4 = self.encoder4(self.proj_feat(x4))
+        dec4 = self.proj_feat(x)
+        dec3 = self.decoder5(dec4, enc4)
+        dec2 = self.decoder4(dec3, enc3)
+        dec1 = self.decoder3(dec2, enc2)
+        out = self.decoder2(dec1, enc1)
+        return self.out(out)
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/varautoencoder.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/varautoencoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..7386883124e4f76e5884d32aa3d47168d3577289
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/varautoencoder.py
@@ -0,0 +1,161 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from monai.networks.layers.convutils import calculate_out_shape, same_padding
+from monai.networks.layers.factories import Act, Norm
+from monai.networks.nets import AutoEncoder
+from monai.utils import deprecated_arg
+
+__all__ = ["VarAutoEncoder"]
+
+
+class VarAutoEncoder(AutoEncoder):
+    """
+    Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_shape: shape of input data starting with channel dimension.
+        out_channels: number of output channels.
+        latent_size: size of the latent variable.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import VarAutoEncoder
+
+        # 3 layer network accepting images with dimensions (1, 32, 32) and using a latent vector with 2 values
+        model = VarAutoEncoder(
+            dimensions=2,
+            in_shape=(32, 32),  # image spatial shape
+            out_channels=1,
+            latent_size=2,
+            channels=(16, 32, 64),
+            strides=(1, 2, 2),
+        )
+
+    see also:
+        - Variational autoencoder network with MedNIST Dataset
+          https://github.com/Project-MONAI/tutorials/blob/master/modules/varautoencoder_mednist.ipynb
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_shape: Sequence[int],
+        out_channels: int,
+        latent_size: int,
+        channels: Sequence[int],
+        strides: Sequence[int],
+        kernel_size: Union[Sequence[int], int] = 3,
+        up_kernel_size: Union[Sequence[int], int] = 3,
+        num_res_units: int = 0,
+        inter_channels: Optional[list] = None,
+        inter_dilations: Optional[list] = None,
+        num_inter_units: int = 2,
+        act: Optional[Union[Tuple, str]] = Act.PRELU,
+        norm: Union[Tuple, str] = Norm.INSTANCE,
+        dropout: Optional[Union[Tuple, str, float]] = None,
+        bias: bool = True,
+        dimensions: Optional[int] = None,
+    ) -> None:
+
+        self.in_channels, *self.in_shape = in_shape
+
+        self.latent_size = latent_size
+        self.final_size = np.asarray(self.in_shape, dtype=int)
+        if dimensions is not None:
+            spatial_dims = dimensions
+
+        super().__init__(
+            spatial_dims,
+            self.in_channels,
+            out_channels,
+            channels,
+            strides,
+            kernel_size,
+            up_kernel_size,
+            num_res_units,
+            inter_channels,
+            inter_dilations,
+            num_inter_units,
+            act,
+            norm,
+            dropout,
+            bias,
+        )
+
+        padding = same_padding(self.kernel_size)
+
+        for s in strides:
+            self.final_size = calculate_out_shape(self.final_size, self.kernel_size, s, padding)  # type: ignore
+
+        linear_size = int(np.product(self.final_size)) * self.encoded_channels
+        self.mu = nn.Linear(linear_size, self.latent_size)
+        self.logvar = nn.Linear(linear_size, self.latent_size)
+        self.decodeL = nn.Linear(self.latent_size, linear_size)
+
+    def encode_forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        x = self.encode(x)
+        x = self.intermediate(x)
+        x = x.view(x.shape[0], -1)
+        mu = self.mu(x)
+        logvar = self.logvar(x)
+        return mu, logvar
+
+    def decode_forward(self, z: torch.Tensor, use_sigmoid: bool = True) -> torch.Tensor:
+        x = F.relu(self.decodeL(z))
+        x = x.view(x.shape[0], self.channels[-1], *self.final_size)
+        x = self.decode(x)
+        if use_sigmoid:
+            x = torch.sigmoid(x)
+        return x
+
+    def reparameterize(self, mu: torch.Tensor, logvar: torch.Tensor) -> torch.Tensor:
+        std = torch.exp(0.5 * logvar)
+
+        if self.training:  # multiply random noise with std only during training
+            std = torch.randn_like(std).mul(std)
+
+        return std.add_(mu)
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        mu, logvar = self.encode_forward(x)
+        z = self.reparameterize(mu, logvar)
+        return self.decode_forward(z), mu, logvar, z
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vit.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f7963ecaa57e8dedfa8bd4ba86d28f1f369007
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vit.py
@@ -0,0 +1,114 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Sequence, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks.patchembedding import PatchEmbeddingBlock
+from monai.networks.blocks.transformerblock import TransformerBlock
+
+__all__ = ["ViT"]
+
+
+class ViT(nn.Module):
+    """
+    Vision Transformer (ViT), based on: "Dosovitskiy et al.,
+    An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
+
+    ViT supports Torchscript but only works for Pytorch after 1.8.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Union[Sequence[int], int],
+        patch_size: Union[Sequence[int], int],
+        hidden_size: int = 768,
+        mlp_dim: int = 3072,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        pos_embed: str = "conv",
+        classification: bool = False,
+        num_classes: int = 2,
+        dropout_rate: float = 0.0,
+        spatial_dims: int = 3,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            hidden_size: dimension of hidden layer.
+            mlp_dim: dimension of feedforward layer.
+            num_layers: number of transformer blocks.
+            num_heads: number of attention heads.
+            pos_embed: position embedding layer type.
+            classification: bool argument to determine if classification is used.
+            num_classes: number of classes if classification is used.
+            dropout_rate: faction of the input units to drop.
+            spatial_dims: number of spatial dimensions.
+
+        Examples::
+
+            # for single channel input with image size of (96,96,96), conv position embedding and segmentation backbone
+            >>> net = ViT(in_channels=1, img_size=(96,96,96), pos_embed='conv')
+
+            # for 3-channel with image size of (128,128,128), 24 layers and classification backbone
+            >>> net = ViT(in_channels=3, img_size=(128,128,128), pos_embed='conv', classification=True)
+
+            # for 3-channel with image size of (224,224), 12 layers and classification backbone
+            >>> net = ViT(in_channels=3, img_size=(224,224), pos_embed='conv', classification=True, spatial_dims=2)
+
+        """
+
+        super().__init__()
+
+        if not (0 <= dropout_rate <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+
+        if hidden_size % num_heads != 0:
+            raise ValueError("hidden_size should be divisible by num_heads.")
+
+        self.classification = classification
+        self.patch_embedding = PatchEmbeddingBlock(
+            in_channels=in_channels,
+            img_size=img_size,
+            patch_size=patch_size,
+            hidden_size=hidden_size,
+            num_heads=num_heads,
+            pos_embed=pos_embed,
+            dropout_rate=dropout_rate,
+            spatial_dims=spatial_dims,
+        )
+        self.blocks = nn.ModuleList(
+            [TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate) for i in range(num_layers)]
+        )
+        self.norm = nn.LayerNorm(hidden_size)
+        if self.classification:
+            self.cls_token = nn.Parameter(torch.zeros(1, 1, hidden_size))
+            self.classification_head = nn.Sequential(nn.Linear(hidden_size, num_classes), nn.Tanh())
+
+    def forward(self, x):
+        x = self.patch_embedding(x)
+        if hasattr(self, "cls_token"):
+            cls_token = self.cls_token.expand(x.shape[0], -1, -1)
+            x = torch.cat((cls_token, x), dim=1)
+        hidden_states_out = []
+        for blk in self.blocks:
+            x = blk(x)
+            hidden_states_out.append(x)
+        x = self.norm(x)
+        if hasattr(self, "classification_head"):
+            x = self.classification_head(x[:, 0])
+        return x, hidden_states_out
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vitautoenc.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vitautoenc.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e5490f9d661cdc6a275171dd5969977cf431b5f
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vitautoenc.py
@@ -0,0 +1,121 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Sequence, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks.patchembedding import PatchEmbeddingBlock
+from monai.networks.blocks.transformerblock import TransformerBlock
+from monai.networks.layers import Conv
+from monai.utils import ensure_tuple_rep
+
+__all__ = ["ViTAutoEnc"]
+
+
+class ViTAutoEnc(nn.Module):
+    """
+    Vision Transformer (ViT), based on: "Dosovitskiy et al.,
+    An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
+
+    Modified to also give same dimension outputs as the input size of the image
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Union[Sequence[int], int],
+        patch_size: Union[Sequence[int], int],
+        out_channels: int = 1,
+        deconv_chns: int = 16,
+        hidden_size: int = 768,
+        mlp_dim: int = 3072,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        pos_embed: str = "conv",
+        dropout_rate: float = 0.0,
+        spatial_dims: int = 3,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels or the number of channels for input
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            hidden_size: dimension of hidden layer.
+            out_channels: number of output channels.
+            deconv_chns: number of channels for the deconvolution layers.
+            mlp_dim: dimension of feedforward layer.
+            num_layers: number of transformer blocks.
+            num_heads: number of attention heads.
+            pos_embed: position embedding layer type.
+            dropout_rate: faction of the input units to drop.
+            spatial_dims: number of spatial dimensions.
+
+        Examples::
+
+            # for single channel input with image size of (96,96,96), conv position embedding and segmentation backbone
+            # It will provide an output of same size as that of the input
+            >>> net = ViTAutoEnc(in_channels=1, patch_size=(16,16,16), img_size=(96,96,96), pos_embed='conv')
+
+            # for 3-channel with image size of (128,128,128), output will be same size as of input
+            >>> net = ViTAutoEnc(in_channels=3, patch_size=(16,16,16), img_size=(128,128,128), pos_embed='conv')
+
+        """
+
+        super().__init__()
+
+        self.patch_size = ensure_tuple_rep(patch_size, spatial_dims)
+        self.spatial_dims = spatial_dims
+
+        self.patch_embedding = PatchEmbeddingBlock(
+            in_channels=in_channels,
+            img_size=img_size,
+            patch_size=patch_size,
+            hidden_size=hidden_size,
+            num_heads=num_heads,
+            pos_embed=pos_embed,
+            dropout_rate=dropout_rate,
+            spatial_dims=self.spatial_dims,
+        )
+        self.blocks = nn.ModuleList(
+            [TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate) for i in range(num_layers)]
+        )
+        self.norm = nn.LayerNorm(hidden_size)
+
+        new_patch_size = [4] * self.spatial_dims
+        conv_trans = Conv[Conv.CONVTRANS, self.spatial_dims]
+        # self.conv3d_transpose* is to be compatible with existing 3d model weights.
+        self.conv3d_transpose = conv_trans(hidden_size, deconv_chns, kernel_size=new_patch_size, stride=new_patch_size)
+        self.conv3d_transpose_1 = conv_trans(
+            in_channels=deconv_chns, out_channels=out_channels, kernel_size=new_patch_size, stride=new_patch_size
+        )
+
+    def forward(self, x):
+        """
+        Args:
+            x: input tensor must have isotropic spatial dimensions,
+                such as ``[batch_size, channels, sp_size, sp_size[, sp_size]]``.
+        """
+        spatial_size = x.shape[2:]
+        x = self.patch_embedding(x)
+        hidden_states_out = []
+        for blk in self.blocks:
+            x = blk(x)
+            hidden_states_out.append(x)
+        x = self.norm(x)
+        x = x.transpose(1, 2)
+        d = [s // p for s, p in zip(spatial_size, self.patch_size)]
+        x = torch.reshape(x, [x.shape[0], x.shape[1], *d])
+        x = self.conv3d_transpose(x)
+        x = self.conv3d_transpose_1(x)
+        return x, hidden_states_out
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vnet.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..7669b4678ee9776cb8a621e364027e38a7f3b5ee
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/networks/nets/vnet.py
@@ -0,0 +1,265 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Optional, Tuple, Type, Union
+
+import torch
+import torch.nn as nn
+
+from monai.networks.blocks.convolutions import Convolution
+from monai.networks.layers.factories import Act, Conv, Dropout, Norm, split_args
+
+__all__ = ["VNet"]
+
+
+def get_acti_layer(act: Union[Tuple[str, Dict], str], nchan: int = 0):
+    if act == "prelu":
+        act = ("prelu", {"num_parameters": nchan})
+    act_name, act_args = split_args(act)
+    act_type = Act[act_name]
+    return act_type(**act_args)
+
+
+class LUConv(nn.Module):
+    def __init__(self, spatial_dims: int, nchan: int, act: Union[Tuple[str, Dict], str], bias: bool = False):
+        super().__init__()
+
+        self.act_function = get_acti_layer(act, nchan)
+        self.conv_block = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=nchan,
+            out_channels=nchan,
+            kernel_size=5,
+            act=None,
+            norm=Norm.BATCH,
+            bias=bias,
+        )
+
+    def forward(self, x):
+        out = self.conv_block(x)
+        out = self.act_function(out)
+        return out
+
+
+def _make_nconv(spatial_dims: int, nchan: int, depth: int, act: Union[Tuple[str, Dict], str], bias: bool = False):
+    layers = []
+    for _ in range(depth):
+        layers.append(LUConv(spatial_dims, nchan, act, bias))
+    return nn.Sequential(*layers)
+
+
+class InputTransition(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        act: Union[Tuple[str, Dict], str],
+        bias: bool = False,
+    ):
+        super().__init__()
+
+        if 16 % in_channels != 0:
+            raise ValueError(f"16 should be divisible by in_channels, got in_channels={in_channels}.")
+
+        self.spatial_dims = spatial_dims
+        self.in_channels = in_channels
+        self.act_function = get_acti_layer(act, 16)
+        self.conv_block = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            out_channels=16,
+            kernel_size=5,
+            act=None,
+            norm=Norm.BATCH,
+            bias=bias,
+        )
+
+    def forward(self, x):
+        out = self.conv_block(x)
+        repeat_num = 16 // self.in_channels
+        x16 = x.repeat([1, repeat_num, 1, 1, 1][: self.spatial_dims + 2])
+        out = self.act_function(torch.add(out, x16))
+        return out
+
+
+class DownTransition(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        nconvs: int,
+        act: Union[Tuple[str, Dict], str],
+        dropout_prob: Optional[float] = None,
+        dropout_dim: int = 3,
+        bias: bool = False,
+    ):
+        super().__init__()
+
+        conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
+        norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
+        dropout_type: Type[Union[nn.Dropout, nn.Dropout2d, nn.Dropout3d]] = Dropout[Dropout.DROPOUT, dropout_dim]
+
+        out_channels = 2 * in_channels
+        self.down_conv = conv_type(in_channels, out_channels, kernel_size=2, stride=2, bias=bias)
+        self.bn1 = norm_type(out_channels)
+        self.act_function1 = get_acti_layer(act, out_channels)
+        self.act_function2 = get_acti_layer(act, out_channels)
+        self.ops = _make_nconv(spatial_dims, out_channels, nconvs, act, bias)
+        self.dropout = dropout_type(dropout_prob) if dropout_prob is not None else None
+
+    def forward(self, x):
+        down = self.act_function1(self.bn1(self.down_conv(x)))
+        if self.dropout is not None:
+            out = self.dropout(down)
+        else:
+            out = down
+        out = self.ops(out)
+        out = self.act_function2(torch.add(out, down))
+        return out
+
+
+class UpTransition(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        nconvs: int,
+        act: Union[Tuple[str, Dict], str],
+        dropout_prob: Optional[float] = None,
+        dropout_dim: int = 3,
+    ):
+        super().__init__()
+
+        conv_trans_type: Type[Union[nn.ConvTranspose2d, nn.ConvTranspose3d]] = Conv[Conv.CONVTRANS, spatial_dims]
+        norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
+        dropout_type: Type[Union[nn.Dropout, nn.Dropout2d, nn.Dropout3d]] = Dropout[Dropout.DROPOUT, dropout_dim]
+
+        self.up_conv = conv_trans_type(in_channels, out_channels // 2, kernel_size=2, stride=2)
+        self.bn1 = norm_type(out_channels // 2)
+        self.dropout = dropout_type(dropout_prob) if dropout_prob is not None else None
+        self.dropout2 = dropout_type(0.5)
+        self.act_function1 = get_acti_layer(act, out_channels // 2)
+        self.act_function2 = get_acti_layer(act, out_channels)
+        self.ops = _make_nconv(spatial_dims, out_channels, nconvs, act)
+
+    def forward(self, x, skipx):
+        if self.dropout is not None:
+            out = self.dropout(x)
+        else:
+            out = x
+        skipxdo = self.dropout2(skipx)
+        out = self.act_function1(self.bn1(self.up_conv(out)))
+        xcat = torch.cat((out, skipxdo), 1)
+        out = self.ops(xcat)
+        out = self.act_function2(torch.add(out, xcat))
+        return out
+
+
+class OutputTransition(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        act: Union[Tuple[str, Dict], str],
+        bias: bool = False,
+    ):
+        super().__init__()
+
+        conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
+
+        self.act_function1 = get_acti_layer(act, out_channels)
+        self.conv_block = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=5,
+            act=None,
+            norm=Norm.BATCH,
+            bias=bias,
+        )
+        self.conv2 = conv_type(out_channels, out_channels, kernel_size=1)
+
+    def forward(self, x):
+        # convolve 32 down to 2 channels
+        out = self.conv_block(x)
+        out = self.act_function1(out)
+        out = self.conv2(out)
+        return out
+
+
+class VNet(nn.Module):
+    """
+    V-Net based on `Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation
+    <https://arxiv.org/pdf/1606.04797.pdf>`_.
+    Adapted from `the official Caffe implementation
+    <https://github.com/faustomilletari/VNet>`_. and `another pytorch implementation
+    <https://github.com/mattmacy/vnet.pytorch/blob/master/vnet.py>`_.
+    The model supports 2D or 3D inputs.
+
+    Args:
+        spatial_dims: spatial dimension of the input data. Defaults to 3.
+        in_channels: number of input channels for the network. Defaults to 1.
+            The value should meet the condition that ``16 % in_channels == 0``.
+        out_channels: number of output channels for the network. Defaults to 1.
+        act: activation type in the network. Defaults to ``("elu", {"inplace": True})``.
+        dropout_prob: dropout ratio. Defaults to 0.5.
+        dropout_dim: determine the dimensions of dropout. Defaults to 3.
+
+            - ``dropout_dim = 1``, randomly zeroes some of the elements for each channel.
+            - ``dropout_dim = 2``, Randomly zeroes out entire channels (a channel is a 2D feature map).
+            - ``dropout_dim = 3``, Randomly zeroes out entire channels (a channel is a 3D feature map).
+        bias: whether to have a bias term in convolution blocks. Defaults to False.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int = 3,
+        in_channels: int = 1,
+        out_channels: int = 1,
+        act: Union[Tuple[str, Dict], str] = ("elu", {"inplace": True}),
+        dropout_prob: float = 0.5,
+        dropout_dim: int = 3,
+        bias: bool = False,
+    ):
+        super().__init__()
+
+        if spatial_dims not in (2, 3):
+            raise AssertionError("spatial_dims can only be 2 or 3.")
+
+        self.in_tr = InputTransition(spatial_dims, in_channels, 16, act, bias=bias)
+        self.down_tr32 = DownTransition(spatial_dims, 16, 1, act, bias=bias)
+        self.down_tr64 = DownTransition(spatial_dims, 32, 2, act, bias=bias)
+        self.down_tr128 = DownTransition(spatial_dims, 64, 3, act, dropout_prob=dropout_prob, bias=bias)
+        self.down_tr256 = DownTransition(spatial_dims, 128, 2, act, dropout_prob=dropout_prob, bias=bias)
+        self.up_tr256 = UpTransition(spatial_dims, 256, 256, 2, act, dropout_prob=dropout_prob)
+        self.up_tr128 = UpTransition(spatial_dims, 256, 128, 2, act, dropout_prob=dropout_prob)
+        self.up_tr64 = UpTransition(spatial_dims, 128, 64, 1, act)
+        self.up_tr32 = UpTransition(spatial_dims, 64, 32, 1, act)
+        self.out_tr = OutputTransition(spatial_dims, 32, out_channels, act, bias=bias)
+
+    def forward(self, x):
+        out16 = self.in_tr(x)
+        out32 = self.down_tr32(out16)
+        out64 = self.down_tr64(out32)
+        out128 = self.down_tr128(out64)
+        out256 = self.down_tr256(out128)
+        x = self.up_tr256(out256, out128)
+        x = self.up_tr128(x, out64)
+        x = self.up_tr64(x, out32)
+        x = self.up_tr32(x, out16)
+        x = self.out_tr(x)
+        return x
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/__init__.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..582ee31489180a354555777acea620a76ac8b458
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_finder.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_finder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d40a4f99a09176891cc92b07df561ea17e187ea7
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_finder.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_scheduler.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_scheduler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7bd44bc243be2319b06d4d258983fe5ec6f5c47
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/lr_scheduler.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/novograd.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/novograd.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb11e5270c742bf517246bacf9fe8be1d862a857
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/novograd.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/utils.cpython-38.pyc b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66d92a47b71a3b62f3320711dac8c53dc5e0446b
Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/monai/optimizers/__pycache__/utils.cpython-38.pyc differ
diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchvision.libs/libnvjpeg.90286a3c.so.11 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchvision.libs/libnvjpeg.90286a3c.so.11
new file mode 100644
index 0000000000000000000000000000000000000000..fbe879b81ba90dc66bb478c844ce4c0ca6c99080
--- /dev/null
+++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchvision.libs/libnvjpeg.90286a3c.so.11
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2ac7412be9b308d833d0dcc4e7c77657d04ca31cdaecc37153e85b6c8969b2e
+size 5161560