Spaces:

hung2903
/

Medical_Image_Segmentation_with_U_NET

Sleeping

App Files Files Community

Kim Mạnh Hưng commited on Feb 1

Commit

aa04f76

1 Parent(s): 0e003ab

Add U-Net app and weights

Browse files

Files changed (41) hide show

.gitignore +3 -0
app.py +118 -0
configs/isic/isic2018_attunet.yaml +50 -0
configs/isic/isic2018_missformer.yaml +52 -0
configs/isic/isic2018_multiresunet.yaml +51 -0
configs/isic/isic2018_resunet.yaml +50 -0
configs/isic/isic2018_transunet.yaml +52 -0
configs/isic/isic2018_uctransnet.yaml +50 -0
configs/isic/isic2018_unet.yaml +51 -0
configs/isic/isic2018_unetpp.yaml +51 -0
configs/segpc/segpc2021_attunet.yaml +47 -0
configs/segpc/segpc2021_missformer.yaml +49 -0
configs/segpc/segpc2021_multiresunet.yaml +53 -0
configs/segpc/segpc2021_resunet.yaml +47 -0
configs/segpc/segpc2021_transunet.yaml +52 -0
configs/segpc/segpc2021_uctransnet.yaml +47 -0
configs/segpc/segpc2021_unet.yaml +48 -0
configs/segpc/segpc2021_unetpp.yaml +48 -0
models/__init__.py +0 -0
models/_missformer/MISSFormer.py +398 -0
models/_missformer/__init__.py +0 -0
models/_missformer/segformer.py +557 -0
models/_resunet/__init__.py +0 -0
models/_resunet/modules.py +143 -0
models/_resunet/res_unet.py +65 -0
models/_transunet/vit_seg_configs.py +130 -0
models/_transunet/vit_seg_modeling.py +453 -0
models/_transunet/vit_seg_modeling_c4.py +453 -0
models/_transunet/vit_seg_modeling_resnet_skip.py +160 -0
models/_transunet/vit_seg_modeling_resnet_skip_c4.py +160 -0
models/_uctransnet/CTrans.py +365 -0
models/_uctransnet/Config.py +72 -0
models/_uctransnet/UCTransNet.py +139 -0
models/_uctransnet/UNet.py +111 -0
models/attunet.py +427 -0
models/multiresunet.py +190 -0
models/unet.py +64 -0
models/unetpp.py +141 -0
requirements.txt +6 -0
saved_models/isic2018_unet/best_model_state_dict.pt +3 -0
saved_models/segpc2021_unet/best_model_state_dict.pt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+*.pyc
+*.pyo

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+import yaml
+import os
+from models.unet import UNet
+# Configuration
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Map dataset names to config/model paths
+CONFIG_PATHS = {
+    'isic': './configs/isic/isic2018_unet.yaml',
+    'segpc': './configs/segpc/segpc2021_unet.yaml'
+}
+MODEL_PATHS = {
+    'isic': './saved_models/isic2018_unet/best_model_state_dict.pt',
+    'segpc': './saved_models/segpc2021_unet/best_model_state_dict.pt'
+}
+def load_config(config_path):
+    with open(config_path, 'r') as f:
+        return yaml.safe_load(f)
+def load_model(dataset_name):
+    config = load_config(CONFIG_PATHS[dataset_name])
+    model = UNet(
+        in_channels=config['model']['in_channels'],
+        out_channels=config['model']['out_channels']
+    )
+    model_path = MODEL_PATHS[dataset_name]
+    if os.path.exists(model_path):
+        state_dict = torch.load(model_path, map_location=DEVICE)
+        model.load_state_dict(state_dict)
+        print(f"Loaded model for {dataset_name} from {model_path}")
+    else:
+        print(f"Warning: Model weights not found for {dataset_name} at {model_path}")
+    model.to(DEVICE)
+    model.eval()
+    return model
+# Load models once (cache them)
+models = {}
+for ds in ['isic', 'segpc']:
+    try:
+        models[ds] = load_model(ds)
+    except Exception as e:
+        print(f"Error loading model {ds}: {e}")
+def predict(image, dataset_choice):
+    if image is None:
+        return None
+    if dataset_choice not in models:
+        return None
+    model = models[dataset_choice]
+    # Preprocess
+    # Resize to 224x224 as per config
+    img_resized = image.resize((224, 224))
+    img_np = np.array(img_resized).astype(np.float32) / 255.0
+    # Handle channels
+    if dataset_choice == 'isic':
+        # ISIC: 3 channels (RGB)
+        if img_np.shape[-1] == 4:
+            img_np = img_np[:, :, :3]
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).float()
+    else:
+        # SegPC: 4 channels (BMP input often loaded as RGB, need to assume/check)
+        if img_np.shape[-1] == 3:
+             # Create fake 4th channel
+             padding = np.zeros((224, 224, 1), dtype=np.float32)
+             img_np = np.concatenate([img_np, padding], axis=-1)
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).float()
+    img_tensor = img_tensor.to(DEVICE)
+    with torch.no_grad():
+        output = model(img_tensor)
+        probs = torch.sigmoid(output)
+        pred_mask = (probs > 0.5).float().cpu().numpy()[0, 0]
+    # Post-process for visualization
+    # Create an overlay
+    base_img = np.array(img_resized)
+    overlay = base_img.copy()
+    # Green mask
+    mask_bool = pred_mask > 0
+    overlay[mask_bool] = [0, 255, 0] # Make Green
+    # Blend
+    final_img = (0.6 * base_img + 0.4 * overlay).astype(np.uint8)
+    return final_img
+# Interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Image(type="pil", label="Input Image"),
+        gr.Radio(["isic", "segpc"], label="Dataset Model", value="isic")
+    ],
+    outputs=gr.Image(type="numpy", label="Prediction Overlay"),
+    title="Medical Image Segmentation (Awesome-U-Net)",
+    description="Upload an image to segment skin lesions (ISIC) or cells (SegPC).",
+    examples=[
+        # Add example paths if available
+        # ["dataset_examples/isic_sample.jpg", "isic"]
+    ]
+)
+if __name__ == "__main__":
+    iface.launch()

configs/isic/isic2018_attunet.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_attunet'
+  load_weights: false
+  name: 'AttU_Net'
+  params:
+    img_ch: 3
+    output_ch: 2
+# preprocess:

configs/isic/isic2018_missformer.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'SGD'
+    params:
+      lr: 0.0001
+      momentum: 0.9
+      weight_decay: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 300
+model:
+  save_dir: '../../saved_models/isic2018_missformer'
+  load_weights: false
+  name: "MISSFormer"
+  params:
+    in_ch: 3
+    num_classes: 2
+# preprocess:

configs/isic/isic2018_multiresunet.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 2
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 2
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 2
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0005
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_multiresunet'
+  load_weights: false
+  name: 'MultiResUnet'
+  params:
+    channels: 3
+    filters: 32
+    nclasses: 2
+# preprocess:

configs/isic/isic2018_resunet.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_resunet'
+  load_weights: false
+  name: 'ResUnet'
+  params:
+    in_ch: 3
+    out_ch: 2
+# preprocess:

configs/isic/isic2018_transunet.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'SGD'
+    params:
+      lr: 0.0001
+      momentum: 0.9
+      weight_decay: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_transunet'
+  load_weights: false
+  name: 'VisionTransformer'
+  params:
+    img_size: 224
+    num_classes: 2
+# preprocess:

configs/isic/isic2018_uctransnet.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_uctransnet'
+  load_weights: false
+  name: "UCTransNet"
+  params:
+    n_channels: 3
+    n_classes: 2
+# preprocess:

configs/isic/isic2018_unet.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "./datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "./datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 0
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 0
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 0
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 2
+model:
+  save_dir: './saved_models/isic2018_unet'
+  load_weights: false
+  name: 'UNet'
+  params:
+    in_channels: 3
+    out_channels: 2
+    with_bn: false
+# preprocess:

configs/isic/isic2018_unetpp.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "ISIC2018Dataset"
+  input_size: 224
+  training:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  validation:
+    params:
+      data_dir: "/path/to/datasets/ISIC2018"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/isic2018_unetpp'
+  load_weights: false
+  name: 'NestedUNet'
+  params:
+    num_classes: 2
+    input_channels: 3
+    deep_supervision: false
+# preprocess:

configs/segpc/segpc2021_attunet.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_attunet'
+  load_weights: false
+  name: 'AttU_Net'
+  params:
+    img_ch: 4
+    output_ch: 2
+# preprocess:

configs/segpc/segpc2021_missformer.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'SGD'
+    params:
+      lr: 0.0001
+      momentum: 0.9
+      weight_decay: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 500
+model:
+  save_dir: '../../saved_models/segpc2021_missformer'
+  load_weights: false
+  name: 'MISSFormer'
+  params:
+    in_ch: 4
+    num_classes: 2
+# preprocess:

configs/segpc/segpc2021_multiresunet.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+#     name: "SGD"
+#     params:
+#       lr: 0.0001
+#       momentum: 0.9
+#       weight_decay: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_multiresunet'
+  load_weights: false
+  name: 'MultiResUnet'
+  params:
+    channels: 4
+    filters: 32
+    nclasses: 2
+    # preprocess:

configs/segpc/segpc2021_resunet.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_resunet'
+  load_weights: false
+  name: 'ResUnet'
+  params:
+    in_ch: 4
+    out_ch: 2
+# preprocess:

configs/segpc/segpc2021_transunet.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+#     name: 'Adam'
+#     params:
+#       lr: 0.0001
+    name: "SGD"
+    params:
+      lr: 0.0001
+      momentum: 0.9
+      weight_decay: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_transunet'
+  load_weights: false
+  name: 'VisionTransformer'
+  params:
+    img_size: 224
+    num_classes: 2
+    # preprocess:

configs/segpc/segpc2021_uctransnet.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_uctransnet'
+  load_weights: false
+  name: 'UCTransNet'
+  params:
+    n_channels: 4
+    n_classes: 2
+# preprocess:

configs/segpc/segpc2021_unet.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "./datasets/SegPC2021/np"
+  dataset_dir: "./datasets/SegPC2021/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 8
+    shuffle: true
+    num_workers: 0
+    pin_memory: true
+  validation:
+    batch_size: 8
+    shuffle: false
+    num_workers: 0
+    pin_memory: true
+  test:
+    batch_size: 8
+    shuffle: false
+    num_workers: 0
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 2
+model:
+  save_dir: './saved_models/segpc2021_unet'
+  load_weights: false
+  name: 'UNet'
+  params:
+    in_channels: 4
+    out_channels: 2
+    with_bn: false
+# preprocess:

configs/segpc/segpc2021_unetpp.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+run:
+  mode: 'train'
+  device: 'gpu'
+transforms: none
+dataset:
+  class_name: "SegPC2021Dataset"
+  input_size: 224
+  scale: 2.5
+  data_dir: "/path/to/datasets/segpc/np"
+  dataset_dir: "/path/to/datasets/segpc/TCIA_SegPC_dataset"
+  number_classes: 2
+data_loader:
+  train:
+    batch_size: 16
+    shuffle: true
+    num_workers: 4
+    pin_memory: true
+  validation:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+  test:
+    batch_size: 16
+    shuffle: false
+    num_workers: 4
+    pin_memory: false
+training:
+  optimizer:
+    name: 'Adam'
+    params:
+      lr: 0.0001
+  criterion:
+    name: "DiceLoss"
+    params: {}
+  scheduler:
+    factor: 0.5
+    patience: 10
+  epochs: 100
+model:
+  save_dir: '../../saved_models/segpc2021_unetpp'
+  load_weights: false
+  name: 'NestedUNet'
+  params:
+    num_classes: 2
+    input_channels: 4
+    deep_supervision: false
+# preprocess:

models/__init__.py ADDED Viewed

File without changes

models/_missformer/MISSFormer.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import torch
+import torch.nn as nn
+from .segformer import *
+from typing import Tuple
+from einops import rearrange
+class PatchExpand(nn.Module):
+    def __init__(self, input_resolution, dim, dim_scale=2, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.expand = nn.Linear(dim, 2*dim, bias=False) if dim_scale==2 else nn.Identity()
+        self.norm = norm_layer(dim // dim_scale)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        # print("x_shape-----",x.shape)
+        H, W = self.input_resolution
+        x = self.expand(x)
+        B, L, C = x.shape
+        # print(x.shape)
+        assert L == H * W, "input feature has wrong size"
+        x = x.view(B, H, W, C)
+        x = rearrange(x, 'b h w (p1 p2 c)-> b (h p1) (w p2) c', p1=2, p2=2, c=C//4)
+        x = x.view(B,-1,C//4)
+        x= self.norm(x.clone())
+        return x
+class FinalPatchExpand_X4(nn.Module):
+    def __init__(self, input_resolution, dim, dim_scale=4, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.dim_scale = dim_scale
+        self.expand = nn.Linear(dim, 16*dim, bias=False)
+        self.output_dim = dim
+        self.norm = norm_layer(self.output_dim)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        H, W = self.input_resolution
+        x = self.expand(x)
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        x = x.view(B, H, W, C)
+        x = rearrange(x, 'b h w (p1 p2 c)-> b (h p1) (w p2) c', p1=self.dim_scale, p2=self.dim_scale, c=C//(self.dim_scale**2))
+        x = x.view(B,-1,self.output_dim)
+        x= self.norm(x.clone())
+        return x
+class SegU_decoder(nn.Module):
+    def __init__(self, input_size, in_out_chan, heads, reduction_ratios, n_class=9, norm_layer=nn.LayerNorm, is_last=False):
+        super().__init__()
+        dims = in_out_chan[0]
+        out_dim = in_out_chan[1]
+        if not is_last:
+            self.concat_linear = nn.Linear(dims*2, out_dim)
+            # transformer decoder
+            self.layer_up = PatchExpand(input_resolution=input_size, dim=out_dim, dim_scale=2, norm_layer=norm_layer)
+            self.last_layer = None
+        else:
+            self.concat_linear = nn.Linear(dims*4, out_dim)
+            # transformer decoder
+            self.layer_up = FinalPatchExpand_X4(input_resolution=input_size, dim=out_dim, dim_scale=4, norm_layer=norm_layer)
+            # self.last_layer = nn.Linear(out_dim, n_class)
+            self.last_layer = nn.Conv2d(out_dim, n_class,1)
+            # self.last_layer = None
+        self.layer_former_1 = TransformerBlock(out_dim, heads, reduction_ratios)
+        self.layer_former_2 = TransformerBlock(out_dim, heads, reduction_ratios)
+        def init_weights(self):
+            for m in self.modules():
+                if isinstance(m, nn.Linear):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.LayerNorm):
+                    nn.init.ones_(m.weight)
+                    nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.Conv2d):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+        init_weights(self)
+    def forward(self, x1, x2=None):
+        if x2 is not None:
+            b, h, w, c = x2.shape
+            x2 = x2.view(b, -1, c)
+            # print("------",x1.shape, x2.shape)
+            cat_x = torch.cat([x1, x2], dim=-1)
+            # print("-----catx shape", cat_x.shape)
+            cat_linear_x = self.concat_linear(cat_x)
+            tran_layer_1 = self.layer_former_1(cat_linear_x, h, w)
+            tran_layer_2 = self.layer_former_2(tran_layer_1, h, w)
+            if self.last_layer:
+                out = self.last_layer(self.layer_up(tran_layer_2).view(b, 4*h, 4*w, -1).permute(0,3,1,2))
+            else:
+                out = self.layer_up(tran_layer_2)
+        else:
+            # if len(x1.shape)>3:
+            #     x1 = x1.permute(0,2,3,1)
+            #     b, h, w, c = x1.shape
+            #     x1 = x1.view(b, -1, c)
+            out = self.layer_up(x1)
+        return out
+class BridgeLayer_4(nn.Module):
+    def __init__(self, dims, head, reduction_ratios):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dims)
+        self.attn = M_EfficientSelfAtten(dims, head, reduction_ratios)
+        self.norm2 = nn.LayerNorm(dims)
+        self.mixffn1 = MixFFN_skip(dims,dims*4)
+        self.mixffn2 = MixFFN_skip(dims*2,dims*8)
+        self.mixffn3 = MixFFN_skip(dims*5,dims*20)
+        self.mixffn4 = MixFFN_skip(dims*8,dims*32)
+    def forward(self, inputs):
+        B = inputs[0].shape[0]
+        C = 64
+        if (type(inputs) == list):
+            # print("-----1-----")
+            c1, c2, c3, c4 = inputs
+            B, C, _, _= c1.shape
+            c1f = c1.permute(0, 2, 3, 1).reshape(B, -1, C)  # 3136*64
+            c2f = c2.permute(0, 2, 3, 1).reshape(B, -1, C)  # 1568*64
+            c3f = c3.permute(0, 2, 3, 1).reshape(B, -1, C)  # 980*64
+            c4f = c4.permute(0, 2, 3, 1).reshape(B, -1, C)  # 392*64
+            # print(c1f.shape, c2f.shape, c3f.shape, c4f.shape)
+            inputs = torch.cat([c1f, c2f, c3f, c4f], -2)
+        else:
+            B,_,C = inputs.shape
+        tx1 = inputs + self.attn(self.norm1(inputs))
+        tx = self.norm2(tx1)
+        tem1 = tx[:,:3136,:].reshape(B, -1, C)
+        tem2 = tx[:,3136:4704,:].reshape(B, -1, C*2)
+        tem3 = tx[:,4704:5684,:].reshape(B, -1, C*5)
+        tem4 = tx[:,5684:6076,:].reshape(B, -1, C*8)
+        m1f = self.mixffn1(tem1, 56, 56).reshape(B, -1, C)
+        m2f = self.mixffn2(tem2, 28, 28).reshape(B, -1, C)
+        m3f = self.mixffn3(tem3, 14, 14).reshape(B, -1, C)
+        m4f = self.mixffn4(tem4, 7, 7).reshape(B, -1, C)
+        t1 = torch.cat([m1f, m2f, m3f, m4f], -2)
+        tx2 = tx1 + t1
+        return tx2
+class BridgeLayer_3(nn.Module):
+    def __init__(self, dims, head, reduction_ratios):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dims)
+        self.attn = M_EfficientSelfAtten(dims, head, reduction_ratios)
+        self.norm2 = nn.LayerNorm(dims)
+        # self.mixffn1 = MixFFN(dims,dims*4)
+        self.mixffn2 = MixFFN(dims*2,dims*8)
+        self.mixffn3 = MixFFN(dims*5,dims*20)
+        self.mixffn4 = MixFFN(dims*8,dims*32)
+    def forward(self, inputs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -> torch.Tensor:
+        B = inputs[0].shape[0]
+        C = 64
+        if (type(inputs) == list):
+            # print("-----1-----")
+            c1, c2, c3, c4 = inputs
+            B, C, _, _= c1.shape
+            c1f = c1.permute(0, 2, 3, 1).reshape(B, -1, C)  # 3136*64
+            c2f = c2.permute(0, 2, 3, 1).reshape(B, -1, C)  # 1568*64
+            c3f = c3.permute(0, 2, 3, 1).reshape(B, -1, C)  # 980*64
+            c4f = c4.permute(0, 2, 3, 1).reshape(B, -1, C)  # 392*64
+            # print(c1f.shape, c2f.shape, c3f.shape, c4f.shape)
+            inputs = torch.cat([c2f, c3f, c4f], -2)
+        else:
+            B,_,C = inputs.shape
+        tx1 = inputs + self.attn(self.norm1(inputs))
+        tx = self.norm2(tx1)
+        # tem1 = tx[:,:3136,:].reshape(B, -1, C)
+        tem2 = tx[:,:1568,:].reshape(B, -1, C*2)
+        tem3 = tx[:,1568:2548,:].reshape(B, -1, C*5)
+        tem4 = tx[:,2548:2940,:].reshape(B, -1, C*8)
+        # m1f = self.mixffn1(tem1, 56, 56).reshape(B, -1, C)
+        m2f = self.mixffn2(tem2, 28, 28).reshape(B, -1, C)
+        m3f = self.mixffn3(tem3, 14, 14).reshape(B, -1, C)
+        m4f = self.mixffn4(tem4, 7, 7).reshape(B, -1, C)
+        t1 = torch.cat([m2f, m3f, m4f], -2)
+        tx2 = tx1 + t1
+        return tx2
+class BridegeBlock_4(nn.Module):
+    def __init__(self, dims, head, reduction_ratios):
+        super().__init__()
+        self.bridge_layer1 = BridgeLayer_4(dims, head, reduction_ratios)
+        self.bridge_layer2 = BridgeLayer_4(dims, head, reduction_ratios)
+        self.bridge_layer3 = BridgeLayer_4(dims, head, reduction_ratios)
+        self.bridge_layer4 = BridgeLayer_4(dims, head, reduction_ratios)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        bridge1 = self.bridge_layer1(x)
+        bridge2 = self.bridge_layer2(bridge1)
+        bridge3 = self.bridge_layer3(bridge2)
+        bridge4 = self.bridge_layer4(bridge3)
+        B,_,C = bridge4.shape
+        outs = []
+        sk1 = bridge4[:,:3136,:].reshape(B, 56, 56, C).permute(0,3,1,2)
+        sk2 = bridge4[:,3136:4704,:].reshape(B, 28, 28, C*2).permute(0,3,1,2)
+        sk3 = bridge4[:,4704:5684,:].reshape(B, 14, 14, C*5).permute(0,3,1,2)
+        sk4 = bridge4[:,5684:6076,:].reshape(B, 7, 7, C*8).permute(0,3,1,2)
+        outs.append(sk1)
+        outs.append(sk2)
+        outs.append(sk3)
+        outs.append(sk4)
+        return outs
+class BridegeBlock_3(nn.Module):
+    def __init__(self, dims, head, reduction_ratios):
+        super().__init__()
+        self.bridge_layer1 = BridgeLayer_3(dims, head, reduction_ratios)
+        self.bridge_layer2 = BridgeLayer_3(dims, head, reduction_ratios)
+        self.bridge_layer3 = BridgeLayer_3(dims, head, reduction_ratios)
+        self.bridge_layer4 = BridgeLayer_3(dims, head, reduction_ratios)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        outs = []
+        if (type(x) == list):
+            # print("-----1-----")
+            outs.append(x[0])
+        bridge1 = self.bridge_layer1(x)
+        bridge2 = self.bridge_layer2(bridge1)
+        bridge3 = self.bridge_layer3(bridge2)
+        bridge4 = self.bridge_layer4(bridge3)
+        B,_,C = bridge4.shape
+        # sk1 = bridge2[:,:3136,:].reshape(B, 56, 56, C).permute(0,3,1,2)
+        sk2 = bridge4[:,:1568,:].reshape(B, 28, 28, C*2).permute(0,3,1,2)
+        sk3 = bridge4[:,1568:2548,:].reshape(B, 14, 14, C*5).permute(0,3,1,2)
+        sk4 = bridge4[:,2548:2940,:].reshape(B, 7, 7, C*8).permute(0,3,1,2)
+        # outs.append(sk1)
+        outs.append(sk2)
+        outs.append(sk3)
+        outs.append(sk4)
+        return outs
+class MyDecoderLayer(nn.Module):
+    def __init__(self, input_size, in_out_chan, heads, reduction_ratios,token_mlp_mode, n_class=9, norm_layer=nn.LayerNorm, is_last=False):
+        super().__init__()
+        dims = in_out_chan[0]
+        out_dim = in_out_chan[1]
+        if not is_last:
+            self.concat_linear = nn.Linear(dims*2, out_dim)
+            # transformer decoder
+            self.layer_up = PatchExpand(input_resolution=input_size, dim=out_dim, dim_scale=2, norm_layer=norm_layer)
+            self.last_layer = None
+        else:
+            self.concat_linear = nn.Linear(dims*4, out_dim)
+            # transformer decoder
+            self.layer_up = FinalPatchExpand_X4(input_resolution=input_size, dim=out_dim, dim_scale=4, norm_layer=norm_layer)
+            # self.last_layer = nn.Linear(out_dim, n_class)
+            self.last_layer = nn.Conv2d(out_dim, n_class,1)
+            # self.last_layer = None
+        self.layer_former_1 = TransformerBlock(out_dim, heads, reduction_ratios, token_mlp_mode)
+        self.layer_former_2 = TransformerBlock(out_dim, heads, reduction_ratios, token_mlp_mode)
+        def init_weights(self):
+            for m in self.modules():
+                if isinstance(m, nn.Linear):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.LayerNorm):
+                    nn.init.ones_(m.weight)
+                    nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.Conv2d):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+        init_weights(self)
+    def forward(self, x1, x2=None):
+        if x2 is not None:
+            b, h, w, c = x2.shape
+            x2 = x2.view(b, -1, c)
+            # print("------",x1.shape, x2.shape)
+            cat_x = torch.cat([x1, x2], dim=-1)
+            # print("-----catx shape", cat_x.shape)
+            cat_linear_x = self.concat_linear(cat_x)
+            tran_layer_1 = self.layer_former_1(cat_linear_x, h, w)
+            tran_layer_2 = self.layer_former_2(tran_layer_1, h, w)
+            if self.last_layer:
+                out = self.last_layer(self.layer_up(tran_layer_2).view(b, 4*h, 4*w, -1).permute(0,3,1,2))
+            else:
+                out = self.layer_up(tran_layer_2)
+        else:
+            # if len(x1.shape)>3:
+            #     x1 = x1.permute(0,2,3,1)
+            #     b, h, w, c = x1.shape
+            #     x1 = x1.view(b, -1, c)
+            out = self.layer_up(x1)
+        return out
+class MISSFormer(nn.Module):
+    def __init__(self, num_classes=9, in_ch=3, token_mlp_mode="mix_skip", encoder_pretrained=True):
+        super().__init__()
+        reduction_ratios = [8, 4, 2, 1]
+        heads = [1, 2, 5, 8]
+        d_base_feat_size = 7 #16 for 512 inputsize   7for 224
+        in_out_chan = [[32, 64],[144, 128],[288, 320],[512, 512]]
+        dims, layers = [[64, 128, 320, 512], [2, 2, 2, 2]]
+        self.backbone = MiT(224, dims, layers,in_ch, token_mlp_mode)
+        self.reduction_ratios = [1, 2, 4, 8]
+        self.bridge = BridegeBlock_4(64, 1, self.reduction_ratios)
+        self.decoder_3= MyDecoderLayer((d_base_feat_size,d_base_feat_size), in_out_chan[3], heads[3], reduction_ratios[3],token_mlp_mode, n_class=num_classes)
+        self.decoder_2= MyDecoderLayer((d_base_feat_size*2,d_base_feat_size*2),in_out_chan[2], heads[2], reduction_ratios[2], token_mlp_mode, n_class=num_classes)
+        self.decoder_1= MyDecoderLayer((d_base_feat_size*4,d_base_feat_size*4), in_out_chan[1], heads[1], reduction_ratios[1], token_mlp_mode, n_class=num_classes)
+        self.decoder_0= MyDecoderLayer((d_base_feat_size*8,d_base_feat_size*8), in_out_chan[0], heads[0], reduction_ratios[0], token_mlp_mode, n_class=num_classes, is_last=True)
+    def forward(self, x):
+        #---------------Encoder-------------------------
+        if x.size()[1] == 1:
+            x = x.repeat(1,3,1,1)
+        encoder = self.backbone(x)
+        bridge = self.bridge(encoder) #list
+        b,c,_,_ = bridge[3].shape
+        # print(bridge[3].shape, bridge[2].shape,bridge[1].shape, bridge[0].shape)
+        #---------------Decoder-------------------------
+        # print("stage3-----")
+        tmp_3 = self.decoder_3(bridge[3].permute(0,2,3,1).view(b,-1,c))
+        # print("stage2-----")
+        tmp_2 = self.decoder_2(tmp_3, bridge[2].permute(0,2,3,1))
+        # print("stage1-----")
+        tmp_1 = self.decoder_1(tmp_2, bridge[1].permute(0,2,3,1))
+        # print("stage0-----")
+        tmp_0 = self.decoder_0(tmp_1, bridge[0].permute(0,2,3,1))
+        return tmp_0

models/_missformer/__init__.py ADDED Viewed

File without changes

models/_missformer/segformer.py ADDED Viewed

	@@ -0,0 +1,557 @@

+import torch
+from torch import nn
+from torch.nn import functional as F
+from typing import Tuple
+class EfficientSelfAtten(nn.Module):
+    def __init__(self, dim, head, reduction_ratio):
+        super().__init__()
+        self.head = head
+        self.reduction_ratio = reduction_ratio
+        self.scale = (dim // head) ** -0.5
+        self.q = nn.Linear(dim, dim, bias=True)
+        self.kv = nn.Linear(dim, dim*2, bias=True)
+        self.proj = nn.Linear(dim, dim)
+        if reduction_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, reduction_ratio, reduction_ratio)
+            self.norm = nn.LayerNorm(dim)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.head, C // self.head).permute(0, 2, 1, 3)
+        if self.reduction_ratio > 1:
+            p_x = x.clone().permute(0, 2, 1).reshape(B, C, H, W)
+            sp_x = self.sr(p_x).reshape(B, C, -1).permute(0, 2, 1)
+            x = self.norm(sp_x)
+        kv = self.kv(x).reshape(B, -1, 2, self.head, C // self.head).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn_score = attn.softmax(dim=-1)
+        x_atten = (attn_score @ v).transpose(1, 2).reshape(B, N, C)
+        out = self.proj(x_atten)
+        return out
+class SelfAtten(nn.Module):
+    def __init__(self, dim, head):
+        super().__init__()
+        self.head = head
+        self.scale = (dim // head) ** -0.5
+        self.q = nn.Linear(dim, dim, bias=True)
+        self.kv = nn.Linear(dim, dim*2, bias=True)
+        self.proj = nn.Linear(dim, dim)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.head, C // self.head).permute(0, 2, 1, 3)
+        kv = self.kv(x).reshape(B, -1, 2, self.head, C // self.head).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn_score = attn.softmax(dim=-1)
+        x_atten = (attn_score @ v).transpose(1, 2).reshape(B, N, C)
+        out = self.proj(x_atten)
+        return out
+class Scale_reduce(nn.Module):
+    def __init__(self, dim, reduction_ratio):
+        super().__init__()
+        self.dim = dim
+        self.reduction_ratio = reduction_ratio
+        if(len(self.reduction_ratio)==4):
+            self.sr0 = nn.Conv2d(dim, dim, reduction_ratio[3], reduction_ratio[3])
+            self.sr1 = nn.Conv2d(dim*2, dim*2, reduction_ratio[2], reduction_ratio[2])
+            self.sr2 = nn.Conv2d(dim*5, dim*5, reduction_ratio[1], reduction_ratio[1])
+        elif(len(self.reduction_ratio)==3):
+            self.sr0 = nn.Conv2d(dim*2, dim*2, reduction_ratio[2], reduction_ratio[2])
+            self.sr1 = nn.Conv2d(dim*5, dim*5, reduction_ratio[1], reduction_ratio[1])
+        self.norm = nn.LayerNorm(dim)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, N, C = x.shape
+        if(len(self.reduction_ratio)==4):
+            tem0 = x[:,:3136,:].reshape(B, 56, 56, C).permute(0, 3, 1, 2)
+            tem1 = x[:,3136:4704,:].reshape(B, 28, 28, C*2).permute(0, 3, 1, 2)
+            tem2 = x[:,4704:5684,:].reshape(B, 14, 14, C*5).permute(0, 3, 1, 2)
+            tem3 = x[:,5684:6076,:]
+            sr_0 = self.sr0(tem0).reshape(B, C, -1).permute(0, 2, 1)
+            sr_1 = self.sr1(tem1).reshape(B, C, -1).permute(0, 2, 1)
+            sr_2 = self.sr2(tem2).reshape(B, C, -1).permute(0, 2, 1)
+            reduce_out = self.norm(torch.cat([sr_0, sr_1, sr_2, tem3], -2))
+        if(len(self.reduction_ratio)==3):
+            tem0 = x[:,:1568,:].reshape(B, 28, 28, C*2).permute(0, 3, 1, 2)
+            tem1 = x[:,1568:2548,:].reshape(B, 14, 14, C*5).permute(0, 3, 1, 2)
+            tem2 = x[:,2548:2940,:]
+            sr_0 = self.sr0(tem0).reshape(B, C, -1).permute(0, 2, 1)
+            sr_1 = self.sr1(tem1).reshape(B, C, -1).permute(0, 2, 1)
+            reduce_out = self.norm(torch.cat([sr_0, sr_1, tem2], -2))
+        return reduce_out
+class M_EfficientSelfAtten(nn.Module):
+    def __init__(self, dim, head, reduction_ratio):
+        super().__init__()
+        self.head = head
+        self.reduction_ratio = reduction_ratio # list[1  2  4  8]
+        self.scale = (dim // head) ** -0.5
+        self.q = nn.Linear(dim, dim, bias=True)
+        self.kv = nn.Linear(dim, dim*2, bias=True)
+        self.proj = nn.Linear(dim, dim)
+        if reduction_ratio is not None:
+            self.scale_reduce = Scale_reduce(dim,reduction_ratio)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.head, C // self.head).permute(0, 2, 1, 3)
+        if self.reduction_ratio is not None:
+            x = self.scale_reduce(x)
+        kv = self.kv(x).reshape(B, -1, 2, self.head, C // self.head).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn_score = attn.softmax(dim=-1)
+        x_atten = (attn_score @ v).transpose(1, 2).reshape(B, N, C)
+        out = self.proj(x_atten)
+        return out
+class LocalEnhance_EfficientSelfAtten(nn.Module):
+    def __init__(self, dim, head, reduction_ratio):
+        super().__init__()
+        self.head = head
+        self.reduction_ratio = reduction_ratio
+        self.scale = (dim // head) ** -0.5
+        self.q = nn.Linear(dim, dim, bias=True)
+        self.kv = nn.Linear(dim, dim*2, bias=True)
+        self.proj = nn.Linear(dim, dim)
+        self.local_pos = DWConv(dim)
+        if reduction_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, reduction_ratio, reduction_ratio)
+            self.norm = nn.LayerNorm(dim)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.head, C // self.head).permute(0, 2, 1, 3)
+        if self.reduction_ratio > 1:
+            p_x = x.clone().permute(0, 2, 1).reshape(B, C, H, W)
+            sp_x = self.sr(p_x).reshape(B, C, -1).permute(0, 2, 1)
+            x = self.norm(sp_x)
+        kv = self.kv(x).reshape(B, -1, 2, self.head, C // self.head).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn_score = attn.softmax(dim=-1)
+        local_v = v.permute(0, 2, 1, 3).reshape(B, N, C)
+        local_pos = self.local_pos(local_v).reshape(B, -1, self.head, C//self.head).permute(0, 2, 1, 3)
+        x_atten = ((attn_score @ v) + local_pos).transpose(1, 2).reshape(B, N, C)
+        out = self.proj(x_atten)
+        return out
+class DWConv(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, groups=dim)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        B, N, C = x.shape
+        tx = x.transpose(1, 2).view(B, C, H, W)
+        conv_x = self.dwconv(tx)
+        return conv_x.flatten(2).transpose(1, 2)
+class MixFFN(nn.Module):
+    def __init__(self, c1, c2):
+        super().__init__()
+        self.fc1 = nn.Linear(c1, c2)
+        self.dwconv = DWConv(c2)
+        self.act = nn.GELU()
+        self.fc2 = nn.Linear(c2, c1)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        ax = self.act(self.dwconv(self.fc1(x), H, W))
+        out = self.fc2(ax)
+        return out
+class MixFFN_skip(nn.Module):
+    def __init__(self, c1, c2):
+        super().__init__()
+        self.fc1 = nn.Linear(c1, c2)
+        self.dwconv = DWConv(c2)
+        self.act = nn.GELU()
+        self.fc2 = nn.Linear(c2, c1)
+        self.norm1 = nn.LayerNorm(c2)
+        self.norm2 = nn.LayerNorm(c2)
+        self.norm3 = nn.LayerNorm(c2)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        ax = self.act(self.norm1(self.dwconv(self.fc1(x), H, W)+self.fc1(x)))
+        out = self.fc2(ax)
+        return out
+class MLP_FFN(nn.Module):
+    def __init__(self, c1, c2):
+        super().__init__()
+        self.fc1 = nn.Linear(c1, c2)
+        self.act = nn.GELU()
+        self.fc2 = nn.Linear(c2, c1)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.fc2(x)
+        return x
+class MixD_FFN(nn.Module):
+    def __init__(self, c1, c2, fuse_mode = "add"):
+        super().__init__()
+        self.fc1 = nn.Linear(c1, c2)
+        self.dwconv = DWConv(c2)
+        self.act = nn.GELU()
+        self.fc2 = nn.Linear(c2, c1) if fuse_mode=="add" else nn.Linear(c2*2, c1)
+        self.fuse_mode = fuse_mode
+    def forward(self, x):
+        ax = self.dwconv(self.fc1(x), H, W)
+        fuse = self.act(ax+self.fc1(x)) if self.fuse_mode=="add" else self.act(torch.cat([ax, self.fc1(x)],2))
+        out = self.fc2(ax)
+        return out
+class OverlapPatchEmbeddings(nn.Module):
+    def __init__(self, img_size=224, patch_size=7, stride=4, padding=1, in_ch=3, dim=768):
+        super().__init__()
+        self.num_patches = (img_size // patch_size) ** 2
+        self.proj = nn.Conv2d(in_ch, dim, patch_size, stride, padding)
+        self.norm = nn.LayerNorm(dim)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        px = self.proj(x)
+        _, _, H, W = px.shape
+        fx = px.flatten(2).transpose(1, 2)
+        nfx = self.norm(fx)
+        return nfx, H, W
+class TransformerBlock(nn.Module):
+    def __init__(self, dim, head, reduction_ratio=1, token_mlp='mix'):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = EfficientSelfAtten(dim, head, reduction_ratio)
+        self.norm2 = nn.LayerNorm(dim)
+        if token_mlp=='mix':
+            self.mlp = MixFFN(dim, int(dim*4))
+        elif token_mlp=='mix_skip':
+            self.mlp = MixFFN_skip(dim, int(dim*4))
+        else:
+            self.mlp = MLP_FFN(dim, int(dim*4))
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        tx = x + self.attn(self.norm1(x), H, W)
+        mx = tx + self.mlp(self.norm2(tx), H, W)
+        return mx
+class FuseTransformerBlock(nn.Module):
+    def __init__(self, dim, head, reduction_ratio=1, fuse_mode = "add"):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = EfficientSelfAtten(dim, head, reduction_ratio)
+        self.norm2 = nn.LayerNorm(dim)
+        self.mlp = MixD_FFN(dim, int(dim*4), fuse_mode)
+    def forward(self, x: torch.Tensor, H, W) -> torch.Tensor:
+        tx = x + self.attn(self.norm1(x), H, W)
+        mx = tx + self.mlp(self.norm2(tx), H, W)
+        return mx
+class MLP(nn.Module):
+    def __init__(self, dim, embed_dim):
+        super().__init__()
+        self.proj = nn.Linear(dim, embed_dim)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x.flatten(2).transpose(1, 2)
+        return self.proj(x)
+class ConvModule(nn.Module):
+    def __init__(self, c1, c2, k):
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c2, k, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.activate = nn.ReLU(True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.activate(self.bn(self.conv(x)))
+class MiT(nn.Module):
+    def __init__(self, image_size, dims, layers, in_ch=3, token_mlp='mix_skip'):
+        super().__init__()
+        patch_sizes = [7, 3, 3, 3]
+        strides = [4, 2, 2, 2]
+        padding_sizes = [3, 1, 1, 1]
+        reduction_ratios = [8, 4, 2, 1]
+        heads = [1, 2, 5, 8]
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbeddings(image_size, patch_sizes[0], strides[0], padding_sizes[0], in_ch, dims[0])
+        self.patch_embed2 = OverlapPatchEmbeddings(image_size//4, patch_sizes[1], strides[1],  padding_sizes[1],dims[0], dims[1])
+        self.patch_embed3 = OverlapPatchEmbeddings(image_size//8, patch_sizes[2], strides[2],  padding_sizes[2],dims[1], dims[2])
+        self.patch_embed4 = OverlapPatchEmbeddings(image_size//16, patch_sizes[3], strides[3],  padding_sizes[3],dims[2], dims[3])
+        # transformer encoder
+        self.block1 = nn.ModuleList([
+            TransformerBlock(dims[0], heads[0], reduction_ratios[0],token_mlp)
+        for _ in range(layers[0])])
+        self.norm1 = nn.LayerNorm(dims[0])
+        self.block2 = nn.ModuleList([
+            TransformerBlock(dims[1], heads[1], reduction_ratios[1],token_mlp)
+        for _ in range(layers[1])])
+        self.norm2 = nn.LayerNorm(dims[1])
+        self.block3 = nn.ModuleList([
+            TransformerBlock(dims[2], heads[2], reduction_ratios[2], token_mlp)
+        for _ in range(layers[2])])
+        self.norm3 = nn.LayerNorm(dims[2])
+        self.block4 = nn.ModuleList([
+            TransformerBlock(dims[3], heads[3], reduction_ratios[3], token_mlp)
+        for _ in range(layers[3])])
+        self.norm4 = nn.LayerNorm(dims[3])
+        # self.head = nn.Linear(dims[3], num_classes)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B = x.shape[0]
+        outs = []
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for blk in self.block1:
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for blk in self.block2:
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for blk in self.block3:
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for blk in self.block4:
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        return outs
+class FuseMiT(nn.Module):
+    def __init__(self, image_size, dims, layers, fuse_mode='add'):
+        super().__init__()
+        patch_sizes = [7, 3, 3, 3]
+        strides = [4, 2, 2, 2]
+        padding_sizes = [3, 1, 1, 1]
+        reduction_ratios = [8, 4, 2, 1]
+        heads = [1, 2, 5, 8]
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbeddings(image_size, patch_sizes[0], strides[0], padding_sizes[0], 3, dims[0])
+        self.patch_embed2 = OverlapPatchEmbeddings(image_size//4, patch_sizes[1], strides[1],  padding_sizes[1],dims[0], dims[1])
+        self.patch_embed3 = OverlapPatchEmbeddings(image_size//8, patch_sizes[2], strides[2],  padding_sizes[2],dims[1], dims[2])
+        self.patch_embed4 = OverlapPatchEmbeddings(image_size//16, patch_sizes[3], strides[3],  padding_sizes[3],dims[2], dims[3])
+        # transformer encoder
+        self.block1 = nn.ModuleList([
+            FuseTransformerBlock(dims[0], heads[0], reduction_ratios[0],fuse_mode)
+        for _ in range(layers[0])])
+        self.norm1 = nn.LayerNorm(dims[0])
+        self.block2 = nn.ModuleList([
+            FuseTransformerBlock(dims[1], heads[1], reduction_ratios[1],fuse_mode)
+        for _ in range(layers[1])])
+        self.norm2 = nn.LayerNorm(dims[1])
+        self.block3 = nn.ModuleList([
+            FuseTransformerBlock(dims[2], heads[2], reduction_ratios[2], fuse_mode)
+        for _ in range(layers[2])])
+        self.norm3 = nn.LayerNorm(dims[2])
+        self.block4 = nn.ModuleList([
+            FuseTransformerBlock(dims[3], heads[3], reduction_ratios[3], fuse_mode)
+        for _ in range(layers[3])])
+        self.norm4 = nn.LayerNorm(dims[3])
+        # self.head = nn.Linear(dims[3], num_classes)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B = x.shape[0]
+        outs = []
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for blk in self.block1:
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for blk in self.block2:
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for blk in self.block3:
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for blk in self.block4:
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        return outs
+class Decoder(nn.Module):
+    def __init__(self, dims, embed_dim, num_classes):
+        super().__init__()
+        self.linear_c1 = MLP(dims[0], embed_dim)
+        self.linear_c2 = MLP(dims[1], embed_dim)
+        self.linear_c3 = MLP(dims[2], embed_dim)
+        self.linear_c4 = MLP(dims[3], embed_dim)
+        self.linear_fuse = ConvModule(embed_dim*4, embed_dim, 1)
+        self.linear_pred = nn.Conv2d(embed_dim, num_classes, 1)
+        self.conv_seg = nn.Conv2d(128, num_classes, 1)
+        self.dropout = nn.Dropout2d(0.1)
+    def forward(self, inputs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -> torch.Tensor:
+        c1, c2, c3, c4 = inputs
+        n = c1.shape[0]
+        c1f = self.linear_c1(c1).permute(0, 2, 1).reshape(n, -1, c1.shape[2], c1.shape[3])
+        c2f = self.linear_c2(c2).permute(0, 2, 1).reshape(n, -1, c2.shape[2], c2.shape[3])
+        c2f = F.interpolate(c2f, size=c1.shape[2:], mode='bilinear', align_corners=False)
+        c3f = self.linear_c3(c3).permute(0, 2, 1).reshape(n, -1, c3.shape[2], c3.shape[3])
+        c3f = F.interpolate(c3f, size=c1.shape[2:], mode='bilinear', align_corners=False)
+        c4f = self.linear_c4(c4).permute(0, 2, 1).reshape(n, -1, c4.shape[2], c4.shape[3])
+        c4f = F.interpolate(c4f, size=c1.shape[2:], mode='bilinear', align_corners=False)
+        c = self.linear_fuse(torch.cat([c4f, c3f, c2f, c1f], dim=1))
+        c = self.dropout(c)
+        return self.linear_pred(c)
+segformer_settings = {
+    'B0': [[32, 64, 160, 256], [2, 2, 2, 2], 256],        # [channel dimensions, num encoder layers, embed dim]
+    'B1': [[64, 128, 320, 512], [2, 2, 2, 2], 256],
+    'B2': [[64, 128, 320, 512], [3, 4, 6, 3], 768],
+    'B3': [[64, 128, 320, 512], [3, 4, 18, 3], 768],
+    'B4': [[64, 128, 320, 512], [3, 8, 27, 3], 768],
+    'B5': [[64, 128, 320, 512], [3, 6, 40, 3], 768]
+}
+class SegFormer(nn.Module):
+    def __init__(self, model_name: str = 'B0', num_classes: int = 19, image_size: int = 224) -> None:
+        super().__init__()
+        assert model_name in segformer_settings.keys(), f"SegFormer model name should be in {list(segformer_settings.keys())}"
+        dims, layers, embed_dim = segformer_settings[model_name]
+        self.backbone = MiT(image_size, dims, layers)
+        self.decode_head = Decoder(dims, embed_dim, num_classes)
+    def init_weights(self, pretrained: str = None) -> None:
+        if pretrained:
+            self.backbone.load_state_dict(torch.load(pretrained, map_location='cpu'), strict=False)
+        else:
+            for m in self.modules():
+                if isinstance(m, nn.Linear):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.LayerNorm):
+                    nn.init.ones_(m.weight)
+                    nn.init.zeros_(m.bias)
+                elif isinstance(m, nn.Conv2d):
+                    nn.init.xavier_uniform_(m.weight)
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.size()[1] == 1:
+            x = x.repeat(1,3,1,1)
+        encoder_outs = self.backbone(x)
+        return self.decode_head(encoder_outs)

models/_resunet/__init__.py ADDED Viewed

File without changes

models/_resunet/modules.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# https://github.com/rishikksh20/ResUnet/blob/master/core/modules.py
+import torch.nn as nn
+import torch
+class ResidualConv(nn.Module):
+    def __init__(self, input_dim, output_dim, stride, padding):
+        super(ResidualConv, self).__init__()
+        self.conv_block = nn.Sequential(
+            nn.BatchNorm2d(input_dim),
+            nn.ReLU(),
+            nn.Conv2d(
+                input_dim, output_dim, kernel_size=3, stride=stride, padding=padding
+            ),
+            nn.BatchNorm2d(output_dim),
+            nn.ReLU(),
+            nn.Conv2d(output_dim, output_dim, kernel_size=3, padding=1),
+        )
+        self.conv_skip = nn.Sequential(
+            nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=stride, padding=1),
+            nn.BatchNorm2d(output_dim),
+        )
+    def forward(self, x):
+        return self.conv_block(x) + self.conv_skip(x)
+class Upsample(nn.Module):
+    def __init__(self, input_dim, output_dim, kernel, stride):
+        super(Upsample, self).__init__()
+        self.upsample = nn.ConvTranspose2d(
+            input_dim, output_dim, kernel_size=kernel, stride=stride
+        )
+    def forward(self, x):
+        return self.upsample(x)
+class Squeeze_Excite_Block(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(Squeeze_Excite_Block, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)
+class ASPP(nn.Module):
+    def __init__(self, in_dims, out_dims, rate=[6, 12, 18]):
+        super(ASPP, self).__init__()
+        self.aspp_block1 = nn.Sequential(
+            nn.Conv2d(
+                in_dims, out_dims, 3, stride=1, padding=rate[0], dilation=rate[0]
+            ),
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(out_dims),
+        )
+        self.aspp_block2 = nn.Sequential(
+            nn.Conv2d(
+                in_dims, out_dims, 3, stride=1, padding=rate[1], dilation=rate[1]
+            ),
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(out_dims),
+        )
+        self.aspp_block3 = nn.Sequential(
+            nn.Conv2d(
+                in_dims, out_dims, 3, stride=1, padding=rate[2], dilation=rate[2]
+            ),
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(out_dims),
+        )
+        self.output = nn.Conv2d(len(rate) * out_dims, out_dims, 1)
+        self._init_weights()
+    def forward(self, x):
+        x1 = self.aspp_block1(x)
+        x2 = self.aspp_block2(x)
+        x3 = self.aspp_block3(x)
+        out = torch.cat([x1, x2, x3], dim=1)
+        return self.output(out)
+    def _init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+class Upsample_(nn.Module):
+    def __init__(self, scale=2):
+        super(Upsample_, self).__init__()
+        self.upsample = nn.Upsample(mode="bilinear", scale_factor=scale)
+    def forward(self, x):
+        return self.upsample(x)
+class AttentionBlock(nn.Module):
+    def __init__(self, input_encoder, input_decoder, output_dim):
+        super(AttentionBlock, self).__init__()
+        self.conv_encoder = nn.Sequential(
+            nn.BatchNorm2d(input_encoder),
+            nn.ReLU(),
+            nn.Conv2d(input_encoder, output_dim, 3, padding=1),
+            nn.MaxPool2d(2, 2),
+        )
+        self.conv_decoder = nn.Sequential(
+            nn.BatchNorm2d(input_decoder),
+            nn.ReLU(),
+            nn.Conv2d(input_decoder, output_dim, 3, padding=1),
+        )
+        self.conv_attn = nn.Sequential(
+            nn.BatchNorm2d(output_dim),
+            nn.ReLU(),
+            nn.Conv2d(output_dim, 1, 1),
+        )
+    def forward(self, x1, x2):
+        out = self.conv_encoder(x1) + self.conv_decoder(x2)
+        out = self.conv_attn(out)
+        return out * x2

models/_resunet/res_unet.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# https://github.com/rishikksh20/ResUnet
+import torch
+import torch.nn as nn
+from .modules import ResidualConv, Upsample
+class ResUnet(nn.Module):
+    def __init__(self, in_ch, out_ch, filters=[64, 128, 256, 512]):
+        super(ResUnet, self).__init__()
+        self.input_layer = nn.Sequential(
+            nn.Conv2d(in_ch, filters[0], kernel_size=3, padding=1),
+            nn.BatchNorm2d(filters[0]),
+            nn.ReLU(),
+            nn.Conv2d(filters[0], filters[0], kernel_size=3, padding=1),
+        )
+        self.input_skip = nn.Sequential(
+            nn.Conv2d(in_ch, filters[0], kernel_size=3, padding=1)
+        )
+        self.residual_conv_1 = ResidualConv(filters[0], filters[1], 2, 1)
+        self.residual_conv_2 = ResidualConv(filters[1], filters[2], 2, 1)
+        self.bridge = ResidualConv(filters[2], filters[3], 2, 1)
+        self.upsample_1 = Upsample(filters[3], filters[3], 2, 2)
+        self.up_residual_conv1 = ResidualConv(filters[3] + filters[2], filters[2], 1, 1)
+        self.upsample_2 = Upsample(filters[2], filters[2], 2, 2)
+        self.up_residual_conv2 = ResidualConv(filters[2] + filters[1], filters[1], 1, 1)
+        self.upsample_3 = Upsample(filters[1], filters[1], 2, 2)
+        self.up_residual_conv3 = ResidualConv(filters[1] + filters[0], filters[0], 1, 1)
+        self.output_layer = nn.Sequential(
+            nn.Conv2d(filters[0], out_ch, 1, 1),
+        )
+    def forward(self, x):
+        # Encode
+        x1 = self.input_layer(x) + self.input_skip(x)
+        x2 = self.residual_conv_1(x1)
+        x3 = self.residual_conv_2(x2)
+        # Bridge
+        x4 = self.bridge(x3)
+        # Decode
+        x4 = self.upsample_1(x4)
+        x5 = torch.cat([x4, x3], dim=1)
+        x6 = self.up_residual_conv1(x5)
+        x6 = self.upsample_2(x6)
+        x7 = torch.cat([x6, x2], dim=1)
+        x8 = self.up_residual_conv2(x7)
+        x8 = self.upsample_3(x8)
+        x9 = torch.cat([x8, x1], dim=1)
+        x10 = self.up_residual_conv3(x9)
+        output = self.output_layer(x10)
+        return output

models/_transunet/vit_seg_configs.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import ml_collections
+def get_b16_config():
+    """Returns the ViT-B/16 configuration."""
+    config = ml_collections.ConfigDict()
+    config.patches = ml_collections.ConfigDict({'size': (16, 16)})
+    config.hidden_size = 768
+    config.transformer = ml_collections.ConfigDict()
+    config.transformer.mlp_dim = 3072
+    config.transformer.num_heads = 12
+    config.transformer.num_layers = 12
+    config.transformer.attention_dropout_rate = 0.0
+    config.transformer.dropout_rate = 0.1
+    config.classifier = 'seg'
+    config.representation_size = None
+    config.resnet_pretrained_path = None
+    config.pretrained_path = '../model/vit_checkpoint/imagenet21k/ViT-B_16.npz'
+    config.patch_size = 16
+    config.decoder_channels = (256, 128, 64, 16)
+    config.n_classes = 2
+    config.activation = 'softmax'
+    return config
+def get_testing():
+    """Returns a minimal configuration for testing."""
+    config = ml_collections.ConfigDict()
+    config.patches = ml_collections.ConfigDict({'size': (16, 16)})
+    config.hidden_size = 1
+    config.transformer = ml_collections.ConfigDict()
+    config.transformer.mlp_dim = 1
+    config.transformer.num_heads = 1
+    config.transformer.num_layers = 1
+    config.transformer.attention_dropout_rate = 0.0
+    config.transformer.dropout_rate = 0.1
+    config.classifier = 'token'
+    config.representation_size = None
+    return config
+def get_r50_b16_config():
+    """Returns the Resnet50 + ViT-B/16 configuration."""
+    config = get_b16_config()
+    config.patches.grid = (16, 16)
+    config.resnet = ml_collections.ConfigDict()
+    config.resnet.num_layers = (3, 4, 9)
+    config.resnet.width_factor = 1
+    config.classifier = 'seg'
+    config.pretrained_path = '../model/vit_checkpoint/imagenet21k/R50+ViT-B_16.npz'
+    config.decoder_channels = (256, 128, 64, 16)
+    config.skip_channels = [512, 256, 64, 16]
+    config.n_classes = 2
+    config.n_skip = 3
+    config.activation = 'softmax'
+    return config
+def get_b32_config():
+    """Returns the ViT-B/32 configuration."""
+    config = get_b16_config()
+    config.patches.size = (32, 32)
+    config.pretrained_path = '../model/vit_checkpoint/imagenet21k/ViT-B_32.npz'
+    return config
+def get_l16_config():
+    """Returns the ViT-L/16 configuration."""
+    config = ml_collections.ConfigDict()
+    config.patches = ml_collections.ConfigDict({'size': (16, 16)})
+    config.hidden_size = 1024
+    config.transformer = ml_collections.ConfigDict()
+    config.transformer.mlp_dim = 4096
+    config.transformer.num_heads = 16
+    config.transformer.num_layers = 24
+    config.transformer.attention_dropout_rate = 0.0
+    config.transformer.dropout_rate = 0.1
+    config.representation_size = None
+    # custom
+    config.classifier = 'seg'
+    config.resnet_pretrained_path = None
+    config.pretrained_path = '../model/vit_checkpoint/imagenet21k/ViT-L_16.npz'
+    config.decoder_channels = (256, 128, 64, 16)
+    config.n_classes = 2
+    config.activation = 'softmax'
+    return config
+def get_r50_l16_config():
+    """Returns the Resnet50 + ViT-L/16 configuration. customized """
+    config = get_l16_config()
+    config.patches.grid = (16, 16)
+    config.resnet = ml_collections.ConfigDict()
+    config.resnet.num_layers = (3, 4, 9)
+    config.resnet.width_factor = 1
+    config.classifier = 'seg'
+    config.resnet_pretrained_path = '../model/vit_checkpoint/imagenet21k/R50+ViT-B_16.npz'
+    config.decoder_channels = (256, 128, 64, 16)
+    config.skip_channels = [512, 256, 64, 16]
+    config.n_classes = 2
+    config.activation = 'softmax'
+    return config
+def get_l32_config():
+    """Returns the ViT-L/32 configuration."""
+    config = get_l16_config()
+    config.patches.size = (32, 32)
+    return config
+def get_h14_config():
+    """Returns the ViT-L/16 configuration."""
+    config = ml_collections.ConfigDict()
+    config.patches = ml_collections.ConfigDict({'size': (14, 14)})
+    config.hidden_size = 1280
+    config.transformer = ml_collections.ConfigDict()
+    config.transformer.mlp_dim = 5120
+    config.transformer.num_heads = 16
+    config.transformer.num_layers = 32
+    config.transformer.attention_dropout_rate = 0.0
+    config.transformer.dropout_rate = 0.1
+    config.classifier = 'token'
+    config.representation_size = None
+    return config

models/_transunet/vit_seg_modeling.py ADDED Viewed

	@@ -0,0 +1,453 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+from . import vit_seg_configs as configs
+from .vit_seg_modeling_resnet_skip import ResNetV2
+logger = logging.getLogger(__name__)
+ATTENTION_Q = "MultiHeadDotProductAttention_1/query"
+ATTENTION_K = "MultiHeadDotProductAttention_1/key"
+ATTENTION_V = "MultiHeadDotProductAttention_1/value"
+ATTENTION_OUT = "MultiHeadDotProductAttention_1/out"
+FC_0 = "MlpBlock_3/Dense_0"
+FC_1 = "MlpBlock_3/Dense_1"
+ATTENTION_NORM = "LayerNorm_0"
+MLP_NORM = "LayerNorm_2"
+def np2th(weights, conv=False):
+    """Possibly convert HWIO to OIHW."""
+    if conv:
+        weights = weights.transpose([3, 2, 0, 1])
+    return torch.from_numpy(weights)
+def swish(x):
+    return x * torch.sigmoid(x)
+ACT2FN = {"gelu": torch.nn.functional.gelu, "relu": torch.nn.functional.relu, "swish": swish}
+class Attention(nn.Module):
+    def __init__(self, config, vis):
+        super(Attention, self).__init__()
+        self.vis = vis
+        self.num_attention_heads = config.transformer["num_heads"]
+        self.attention_head_size = int(config.hidden_size / self.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = Linear(config.hidden_size, self.all_head_size)
+        self.key = Linear(config.hidden_size, self.all_head_size)
+        self.value = Linear(config.hidden_size, self.all_head_size)
+        self.out = Linear(config.hidden_size, config.hidden_size)
+        self.attn_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.proj_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.softmax = Softmax(dim=-1)
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(self, hidden_states):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        attention_probs = self.softmax(attention_scores)
+        weights = attention_probs if self.vis else None
+        attention_probs = self.attn_dropout(attention_probs)
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        attention_output = self.out(context_layer)
+        attention_output = self.proj_dropout(attention_output)
+        return attention_output, weights
+class Mlp(nn.Module):
+    def __init__(self, config):
+        super(Mlp, self).__init__()
+        self.fc1 = Linear(config.hidden_size, config.transformer["mlp_dim"])
+        self.fc2 = Linear(config.transformer["mlp_dim"], config.hidden_size)
+        self.act_fn = ACT2FN["gelu"]
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+        self._init_weights()
+    def _init_weights(self):
+        nn.init.xavier_uniform_(self.fc1.weight)
+        nn.init.xavier_uniform_(self.fc2.weight)
+        nn.init.normal_(self.fc1.bias, std=1e-6)
+        nn.init.normal_(self.fc2.bias, std=1e-6)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act_fn(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = self.dropout(x)
+        return x
+class Embeddings(nn.Module):
+    """Construct the embeddings from patch, position embeddings.
+    """
+    def __init__(self, config, img_size, in_channels=3):
+        super(Embeddings, self).__init__()
+        self.hybrid = None
+        self.config = config
+        img_size = _pair(img_size)
+        if config.patches.get("grid") is not None:   # ResNet
+            grid_size = config.patches["grid"]
+            patch_size = (img_size[0] // 16 // grid_size[0], img_size[1] // 16 // grid_size[1])
+            patch_size_real = (patch_size[0] * 16, patch_size[1] * 16)
+            n_patches = (img_size[0] // patch_size_real[0]) * (img_size[1] // patch_size_real[1])
+            self.hybrid = True
+        else:
+            patch_size = _pair(config.patches["size"])
+            n_patches = (img_size[0] // patch_size[0]) * (img_size[1] // patch_size[1])
+            self.hybrid = False
+        if self.hybrid:
+            self.hybrid_model = ResNetV2(block_units=config.resnet.num_layers, width_factor=config.resnet.width_factor)
+            in_channels = self.hybrid_model.width * 16
+        self.patch_embeddings = Conv2d(in_channels=in_channels,
+                                       out_channels=config.hidden_size,
+                                       kernel_size=patch_size,
+                                       stride=patch_size)
+        self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches, config.hidden_size))
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+    def forward(self, x):
+        if self.hybrid:
+            x, features = self.hybrid_model(x)
+        else:
+            features = None
+        x = self.patch_embeddings(x)  # (B, hidden. n_patches^(1/2), n_patches^(1/2))
+        x = x.flatten(2)
+        x = x.transpose(-1, -2)  # (B, n_patches, hidden)
+        embeddings = x + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings, features
+class Block(nn.Module):
+    def __init__(self, config, vis):
+        super(Block, self).__init__()
+        self.hidden_size = config.hidden_size
+        self.attention_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        self.ffn_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        self.ffn = Mlp(config)
+        self.attn = Attention(config, vis)
+    def forward(self, x):
+        h = x
+        x = self.attention_norm(x)
+        x, weights = self.attn(x)
+        x = x + h
+        h = x
+        x = self.ffn_norm(x)
+        x = self.ffn(x)
+        x = x + h
+        return x, weights
+    def load_from(self, weights, n_block):
+        ROOT = f"Transformer/encoderblock_{n_block}"
+        with torch.no_grad():
+            query_weight = np2th(weights[pjoin(ROOT, ATTENTION_Q, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            key_weight = np2th(weights[pjoin(ROOT, ATTENTION_K, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            value_weight = np2th(weights[pjoin(ROOT, ATTENTION_V, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            out_weight = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            query_bias = np2th(weights[pjoin(ROOT, ATTENTION_Q, "bias")]).view(-1)
+            key_bias = np2th(weights[pjoin(ROOT, ATTENTION_K, "bias")]).view(-1)
+            value_bias = np2th(weights[pjoin(ROOT, ATTENTION_V, "bias")]).view(-1)
+            out_bias = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "bias")]).view(-1)
+            self.attn.query.weight.copy_(query_weight)
+            self.attn.key.weight.copy_(key_weight)
+            self.attn.value.weight.copy_(value_weight)
+            self.attn.out.weight.copy_(out_weight)
+            self.attn.query.bias.copy_(query_bias)
+            self.attn.key.bias.copy_(key_bias)
+            self.attn.value.bias.copy_(value_bias)
+            self.attn.out.bias.copy_(out_bias)
+            mlp_weight_0 = np2th(weights[pjoin(ROOT, FC_0, "kernel")]).t()
+            mlp_weight_1 = np2th(weights[pjoin(ROOT, FC_1, "kernel")]).t()
+            mlp_bias_0 = np2th(weights[pjoin(ROOT, FC_0, "bias")]).t()
+            mlp_bias_1 = np2th(weights[pjoin(ROOT, FC_1, "bias")]).t()
+            self.ffn.fc1.weight.copy_(mlp_weight_0)
+            self.ffn.fc2.weight.copy_(mlp_weight_1)
+            self.ffn.fc1.bias.copy_(mlp_bias_0)
+            self.ffn.fc2.bias.copy_(mlp_bias_1)
+            self.attention_norm.weight.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "scale")]))
+            self.attention_norm.bias.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "bias")]))
+            self.ffn_norm.weight.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "scale")]))
+            self.ffn_norm.bias.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "bias")]))
+class Encoder(nn.Module):
+    def __init__(self, config, vis):
+        super(Encoder, self).__init__()
+        self.vis = vis
+        self.layer = nn.ModuleList()
+        self.encoder_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        for _ in range(config.transformer["num_layers"]):
+            layer = Block(config, vis)
+            self.layer.append(copy.deepcopy(layer))
+    def forward(self, hidden_states):
+        attn_weights = []
+        for layer_block in self.layer:
+            hidden_states, weights = layer_block(hidden_states)
+            if self.vis:
+                attn_weights.append(weights)
+        encoded = self.encoder_norm(hidden_states)
+        return encoded, attn_weights
+class Transformer(nn.Module):
+    def __init__(self, config, img_size, vis):
+        super(Transformer, self).__init__()
+        self.embeddings = Embeddings(config, img_size=img_size)
+        self.encoder = Encoder(config, vis)
+    def forward(self, input_ids):
+        embedding_output, features = self.embeddings(input_ids)
+        encoded, attn_weights = self.encoder(embedding_output)  # (B, n_patch, hidden)
+        return encoded, attn_weights, features
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            padding=0,
+            stride=1,
+            use_batchnorm=True,
+    ):
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+        bn = nn.BatchNorm2d(out_channels)
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+class DecoderBlock(nn.Module):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            skip_channels=0,
+            use_batchnorm=True,
+    ):
+        super().__init__()
+        self.conv1 = Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.up = nn.UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, x, skip=None):
+        x = self.up(x)
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+class SegmentationHead(nn.Sequential):
+    def __init__(self, in_channels, out_channels, kernel_size=3, upsampling=1):
+        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
+        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
+        super().__init__(conv2d, upsampling)
+class DecoderCup(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        head_channels = 512
+        self.conv_more = Conv2dReLU(
+            config.hidden_size,
+            head_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=True,
+        )
+        decoder_channels = config.decoder_channels
+        in_channels = [head_channels] + list(decoder_channels[:-1])
+        out_channels = decoder_channels
+        if self.config.n_skip != 0:
+            skip_channels = self.config.skip_channels
+            for i in range(4-self.config.n_skip):  # re-select the skip channels according to n_skip
+                skip_channels[3-i]=0
+        else:
+            skip_channels=[0,0,0,0]
+        blocks = [
+            DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
+        ]
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, hidden_states, features=None):
+        B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = hidden_states.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        x = self.conv_more(x)
+        for i, decoder_block in enumerate(self.blocks):
+            if features is not None:
+                skip = features[i] if (i < self.config.n_skip) else None
+            else:
+                skip = None
+            x = decoder_block(x, skip=skip)
+        return x
+class VisionTransformer(nn.Module):
+    def __init__(self, config, img_size=224, num_classes=21843, zero_head=False, vis=False):
+        super(VisionTransformer, self).__init__()
+        self.num_classes = num_classes
+        self.zero_head = zero_head
+        self.classifier = config.classifier
+        self.transformer = Transformer(config, img_size, vis)
+        self.decoder = DecoderCup(config)
+        self.segmentation_head = SegmentationHead(
+            in_channels=config['decoder_channels'][-1],
+            out_channels=config['n_classes'],
+            kernel_size=3,
+        )
+        self.config = config
+    def forward(self, x):
+        if x.size()[1] == 1:
+            x = x.repeat(1,3,1,1)
+        x, attn_weights, features = self.transformer(x)  # (B, n_patch, hidden)
+        x = self.decoder(x, features)
+        logits = self.segmentation_head(x)
+        return logits
+    def load_from(self, weights):
+        with torch.no_grad():
+            res_weight = weights
+            self.transformer.embeddings.patch_embeddings.weight.copy_(np2th(weights["embedding/kernel"], conv=True))
+            self.transformer.embeddings.patch_embeddings.bias.copy_(np2th(weights["embedding/bias"]))
+            self.transformer.encoder.encoder_norm.weight.copy_(np2th(weights["Transformer/encoder_norm/scale"]))
+            self.transformer.encoder.encoder_norm.bias.copy_(np2th(weights["Transformer/encoder_norm/bias"]))
+            posemb = np2th(weights["Transformer/posembed_input/pos_embedding"])
+            posemb_new = self.transformer.embeddings.position_embeddings
+            if posemb.size() == posemb_new.size():
+                self.transformer.embeddings.position_embeddings.copy_(posemb)
+            elif posemb.size()[1]-1 == posemb_new.size()[1]:
+                posemb = posemb[:, 1:]
+                self.transformer.embeddings.position_embeddings.copy_(posemb)
+            else:
+                logger.info("load_pretrained: resized variant: %s to %s" % (posemb.size(), posemb_new.size()))
+                ntok_new = posemb_new.size(1)
+                if self.classifier == "seg":
+                    _, posemb_grid = posemb[:, :1], posemb[0, 1:]
+                gs_old = int(np.sqrt(len(posemb_grid)))
+                gs_new = int(np.sqrt(ntok_new))
+                print('load_pretrained: grid-size from %s to %s' % (gs_old, gs_new))
+                posemb_grid = posemb_grid.reshape(gs_old, gs_old, -1)
+                zoom = (gs_new / gs_old, gs_new / gs_old, 1)
+                posemb_grid = ndimage.zoom(posemb_grid, zoom, order=1)  # th2np
+                posemb_grid = posemb_grid.reshape(1, gs_new * gs_new, -1)
+                posemb = posemb_grid
+                self.transformer.embeddings.position_embeddings.copy_(np2th(posemb))
+            # Encoder whole
+            for bname, block in self.transformer.encoder.named_children():
+                for uname, unit in block.named_children():
+                    unit.load_from(weights, n_block=uname)
+            if self.transformer.embeddings.hybrid:
+                self.transformer.embeddings.hybrid_model.root.conv.weight.copy_(np2th(res_weight["conv_root/kernel"], conv=True))
+                gn_weight = np2th(res_weight["gn_root/scale"]).view(-1)
+                gn_bias = np2th(res_weight["gn_root/bias"]).view(-1)
+                self.transformer.embeddings.hybrid_model.root.gn.weight.copy_(gn_weight)
+                self.transformer.embeddings.hybrid_model.root.gn.bias.copy_(gn_bias)
+                for bname, block in self.transformer.embeddings.hybrid_model.body.named_children():
+                    for uname, unit in block.named_children():
+                        unit.load_from(res_weight, n_block=bname, n_unit=uname)
+CONFIGS = {
+    'ViT-B_16': configs.get_b16_config(),
+    'ViT-B_32': configs.get_b32_config(),
+    'ViT-L_16': configs.get_l16_config(),
+    'ViT-L_32': configs.get_l32_config(),
+    'ViT-H_14': configs.get_h14_config(),
+    'R50-ViT-B_16': configs.get_r50_b16_config(),
+    'R50-ViT-L_16': configs.get_r50_l16_config(),
+    'testing': configs.get_testing(),
+}

models/_transunet/vit_seg_modeling_c4.py ADDED Viewed

	@@ -0,0 +1,453 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+from . import vit_seg_configs as configs
+from .vit_seg_modeling_resnet_skip_c4 import ResNetV2
+logger = logging.getLogger(__name__)
+ATTENTION_Q = "MultiHeadDotProductAttention_1/query"
+ATTENTION_K = "MultiHeadDotProductAttention_1/key"
+ATTENTION_V = "MultiHeadDotProductAttention_1/value"
+ATTENTION_OUT = "MultiHeadDotProductAttention_1/out"
+FC_0 = "MlpBlock_3/Dense_0"
+FC_1 = "MlpBlock_3/Dense_1"
+ATTENTION_NORM = "LayerNorm_0"
+MLP_NORM = "LayerNorm_2"
+def np2th(weights, conv=False):
+    """Possibly convert HWIO to OIHW."""
+    if conv:
+        weights = weights.transpose([3, 2, 0, 1])
+    return torch.from_numpy(weights)
+def swish(x):
+    return x * torch.sigmoid(x)
+ACT2FN = {"gelu": torch.nn.functional.gelu, "relu": torch.nn.functional.relu, "swish": swish}
+class Attention(nn.Module):
+    def __init__(self, config, vis):
+        super(Attention, self).__init__()
+        self.vis = vis
+        self.num_attention_heads = config.transformer["num_heads"]
+        self.attention_head_size = int(config.hidden_size / self.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = Linear(config.hidden_size, self.all_head_size)
+        self.key = Linear(config.hidden_size, self.all_head_size)
+        self.value = Linear(config.hidden_size, self.all_head_size)
+        self.out = Linear(config.hidden_size, config.hidden_size)
+        self.attn_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.proj_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.softmax = Softmax(dim=-1)
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(self, hidden_states):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        attention_probs = self.softmax(attention_scores)
+        weights = attention_probs if self.vis else None
+        attention_probs = self.attn_dropout(attention_probs)
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        attention_output = self.out(context_layer)
+        attention_output = self.proj_dropout(attention_output)
+        return attention_output, weights
+class Mlp(nn.Module):
+    def __init__(self, config):
+        super(Mlp, self).__init__()
+        self.fc1 = Linear(config.hidden_size, config.transformer["mlp_dim"])
+        self.fc2 = Linear(config.transformer["mlp_dim"], config.hidden_size)
+        self.act_fn = ACT2FN["gelu"]
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+        self._init_weights()
+    def _init_weights(self):
+        nn.init.xavier_uniform_(self.fc1.weight)
+        nn.init.xavier_uniform_(self.fc2.weight)
+        nn.init.normal_(self.fc1.bias, std=1e-6)
+        nn.init.normal_(self.fc2.bias, std=1e-6)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act_fn(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = self.dropout(x)
+        return x
+class Embeddings(nn.Module):
+    """Construct the embeddings from patch, position embeddings.
+    """
+    def __init__(self, config, img_size, in_channels=4):
+        super(Embeddings, self).__init__()
+        self.hybrid = None
+        self.config = config
+        img_size = _pair(img_size)
+        if config.patches.get("grid") is not None:   # ResNet
+            grid_size = config.patches["grid"]
+            patch_size = (img_size[0] // 16 // grid_size[0], img_size[1] // 16 // grid_size[1])
+            patch_size_real = (patch_size[0] * 16, patch_size[1] * 16)
+            n_patches = (img_size[0] // patch_size_real[0]) * (img_size[1] // patch_size_real[1])
+            self.hybrid = True
+        else:
+            patch_size = _pair(config.patches["size"])
+            n_patches = (img_size[0] // patch_size[0]) * (img_size[1] // patch_size[1])
+            self.hybrid = False
+        if self.hybrid:
+            self.hybrid_model = ResNetV2(block_units=config.resnet.num_layers, width_factor=config.resnet.width_factor)
+            in_channels = self.hybrid_model.width * 16
+        self.patch_embeddings = Conv2d(in_channels=in_channels,
+                                       out_channels=config.hidden_size,
+                                       kernel_size=patch_size,
+                                       stride=patch_size)
+        self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches, config.hidden_size))
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+    def forward(self, x):
+        if self.hybrid:
+            x, features = self.hybrid_model(x)
+        else:
+            features = None
+        x = self.patch_embeddings(x)  # (B, hidden. n_patches^(1/2), n_patches^(1/2))
+        x = x.flatten(2)
+        x = x.transpose(-1, -2)  # (B, n_patches, hidden)
+        embeddings = x + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings, features
+class Block(nn.Module):
+    def __init__(self, config, vis):
+        super(Block, self).__init__()
+        self.hidden_size = config.hidden_size
+        self.attention_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        self.ffn_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        self.ffn = Mlp(config)
+        self.attn = Attention(config, vis)
+    def forward(self, x):
+        h = x
+        x = self.attention_norm(x)
+        x, weights = self.attn(x)
+        x = x + h
+        h = x
+        x = self.ffn_norm(x)
+        x = self.ffn(x)
+        x = x + h
+        return x, weights
+    def load_from(self, weights, n_block):
+        ROOT = f"Transformer/encoderblock_{n_block}"
+        with torch.no_grad():
+            query_weight = np2th(weights[pjoin(ROOT, ATTENTION_Q, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            key_weight = np2th(weights[pjoin(ROOT, ATTENTION_K, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            value_weight = np2th(weights[pjoin(ROOT, ATTENTION_V, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            out_weight = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            query_bias = np2th(weights[pjoin(ROOT, ATTENTION_Q, "bias")]).view(-1)
+            key_bias = np2th(weights[pjoin(ROOT, ATTENTION_K, "bias")]).view(-1)
+            value_bias = np2th(weights[pjoin(ROOT, ATTENTION_V, "bias")]).view(-1)
+            out_bias = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "bias")]).view(-1)
+            self.attn.query.weight.copy_(query_weight)
+            self.attn.key.weight.copy_(key_weight)
+            self.attn.value.weight.copy_(value_weight)
+            self.attn.out.weight.copy_(out_weight)
+            self.attn.query.bias.copy_(query_bias)
+            self.attn.key.bias.copy_(key_bias)
+            self.attn.value.bias.copy_(value_bias)
+            self.attn.out.bias.copy_(out_bias)
+            mlp_weight_0 = np2th(weights[pjoin(ROOT, FC_0, "kernel")]).t()
+            mlp_weight_1 = np2th(weights[pjoin(ROOT, FC_1, "kernel")]).t()
+            mlp_bias_0 = np2th(weights[pjoin(ROOT, FC_0, "bias")]).t()
+            mlp_bias_1 = np2th(weights[pjoin(ROOT, FC_1, "bias")]).t()
+            self.ffn.fc1.weight.copy_(mlp_weight_0)
+            self.ffn.fc2.weight.copy_(mlp_weight_1)
+            self.ffn.fc1.bias.copy_(mlp_bias_0)
+            self.ffn.fc2.bias.copy_(mlp_bias_1)
+            self.attention_norm.weight.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "scale")]))
+            self.attention_norm.bias.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "bias")]))
+            self.ffn_norm.weight.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "scale")]))
+            self.ffn_norm.bias.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "bias")]))
+class Encoder(nn.Module):
+    def __init__(self, config, vis):
+        super(Encoder, self).__init__()
+        self.vis = vis
+        self.layer = nn.ModuleList()
+        self.encoder_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        for _ in range(config.transformer["num_layers"]):
+            layer = Block(config, vis)
+            self.layer.append(copy.deepcopy(layer))
+    def forward(self, hidden_states):
+        attn_weights = []
+        for layer_block in self.layer:
+            hidden_states, weights = layer_block(hidden_states)
+            if self.vis:
+                attn_weights.append(weights)
+        encoded = self.encoder_norm(hidden_states)
+        return encoded, attn_weights
+class Transformer(nn.Module):
+    def __init__(self, config, img_size, vis):
+        super(Transformer, self).__init__()
+        self.embeddings = Embeddings(config, img_size=img_size)
+        self.encoder = Encoder(config, vis)
+    def forward(self, input_ids):
+        embedding_output, features = self.embeddings(input_ids)
+        encoded, attn_weights = self.encoder(embedding_output)  # (B, n_patch, hidden)
+        return encoded, attn_weights, features
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            padding=0,
+            stride=1,
+            use_batchnorm=True,
+    ):
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+        bn = nn.BatchNorm2d(out_channels)
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+class DecoderBlock(nn.Module):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            skip_channels=0,
+            use_batchnorm=True,
+    ):
+        super().__init__()
+        self.conv1 = Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.up = nn.UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, x, skip=None):
+        x = self.up(x)
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+class SegmentationHead(nn.Sequential):
+    def __init__(self, in_channels, out_channels, kernel_size=3, upsampling=1):
+        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
+        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
+        super().__init__(conv2d, upsampling)
+class DecoderCup(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        head_channels = 512
+        self.conv_more = Conv2dReLU(
+            config.hidden_size,
+            head_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=True,
+        )
+        decoder_channels = config.decoder_channels
+        in_channels = [head_channels] + list(decoder_channels[:-1])
+        out_channels = decoder_channels
+        if self.config.n_skip != 0:
+            skip_channels = self.config.skip_channels
+            for i in range(4-self.config.n_skip):  # re-select the skip channels according to n_skip
+                skip_channels[3-i]=0
+        else:
+            skip_channels=[0,0,0,0]
+        blocks = [
+            DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
+        ]
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, hidden_states, features=None):
+        B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = hidden_states.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        x = self.conv_more(x)
+        for i, decoder_block in enumerate(self.blocks):
+            if features is not None:
+                skip = features[i] if (i < self.config.n_skip) else None
+            else:
+                skip = None
+            x = decoder_block(x, skip=skip)
+        return x
+class VisionTransformer(nn.Module):
+    def __init__(self, config, img_size=224, num_classes=21843, zero_head=False, vis=False):
+        super(VisionTransformer, self).__init__()
+        self.num_classes = num_classes
+        self.zero_head = zero_head
+        self.classifier = config.classifier
+        self.transformer = Transformer(config, img_size, vis)
+        self.decoder = DecoderCup(config)
+        self.segmentation_head = SegmentationHead(
+            in_channels=config['decoder_channels'][-1],
+            out_channels=config['n_classes'],
+            kernel_size=3,
+        )
+        self.config = config
+    def forward(self, x):
+        if x.size()[1] == 1:
+            x = x.repeat(1,4,1,1)
+        x, attn_weights, features = self.transformer(x)  # (B, n_patch, hidden)
+        x = self.decoder(x, features)
+        logits = self.segmentation_head(x)
+        return logits
+    def load_from(self, weights):
+        with torch.no_grad():
+            res_weight = weights
+            self.transformer.embeddings.patch_embeddings.weight.copy_(np2th(weights["embedding/kernel"], conv=True))
+            self.transformer.embeddings.patch_embeddings.bias.copy_(np2th(weights["embedding/bias"]))
+            self.transformer.encoder.encoder_norm.weight.copy_(np2th(weights["Transformer/encoder_norm/scale"]))
+            self.transformer.encoder.encoder_norm.bias.copy_(np2th(weights["Transformer/encoder_norm/bias"]))
+            posemb = np2th(weights["Transformer/posembed_input/pos_embedding"])
+            posemb_new = self.transformer.embeddings.position_embeddings
+            if posemb.size() == posemb_new.size():
+                self.transformer.embeddings.position_embeddings.copy_(posemb)
+            elif posemb.size()[1]-1 == posemb_new.size()[1]:
+                posemb = posemb[:, 1:]
+                self.transformer.embeddings.position_embeddings.copy_(posemb)
+            else:
+                logger.info("load_pretrained: resized variant: %s to %s" % (posemb.size(), posemb_new.size()))
+                ntok_new = posemb_new.size(1)
+                if self.classifier == "seg":
+                    _, posemb_grid = posemb[:, :1], posemb[0, 1:]
+                gs_old = int(np.sqrt(len(posemb_grid)))
+                gs_new = int(np.sqrt(ntok_new))
+                print('load_pretrained: grid-size from %s to %s' % (gs_old, gs_new))
+                posemb_grid = posemb_grid.reshape(gs_old, gs_old, -1)
+                zoom = (gs_new / gs_old, gs_new / gs_old, 1)
+                posemb_grid = ndimage.zoom(posemb_grid, zoom, order=1)  # th2np
+                posemb_grid = posemb_grid.reshape(1, gs_new * gs_new, -1)
+                posemb = posemb_grid
+                self.transformer.embeddings.position_embeddings.copy_(np2th(posemb))
+            # Encoder whole
+            for bname, block in self.transformer.encoder.named_children():
+                for uname, unit in block.named_children():
+                    unit.load_from(weights, n_block=uname)
+            if self.transformer.embeddings.hybrid:
+                self.transformer.embeddings.hybrid_model.root.conv.weight.copy_(np2th(res_weight["conv_root/kernel"], conv=True))
+                gn_weight = np2th(res_weight["gn_root/scale"]).view(-1)
+                gn_bias = np2th(res_weight["gn_root/bias"]).view(-1)
+                self.transformer.embeddings.hybrid_model.root.gn.weight.copy_(gn_weight)
+                self.transformer.embeddings.hybrid_model.root.gn.bias.copy_(gn_bias)
+                for bname, block in self.transformer.embeddings.hybrid_model.body.named_children():
+                    for uname, unit in block.named_children():
+                        unit.load_from(res_weight, n_block=bname, n_unit=uname)
+CONFIGS = {
+    'ViT-B_16': configs.get_b16_config(),
+    'ViT-B_32': configs.get_b32_config(),
+    'ViT-L_16': configs.get_l16_config(),
+    'ViT-L_32': configs.get_l32_config(),
+    'ViT-H_14': configs.get_h14_config(),
+    'R50-ViT-B_16': configs.get_r50_b16_config(),
+    'R50-ViT-L_16': configs.get_r50_l16_config(),
+    'testing': configs.get_testing(),
+}

models/_transunet/vit_seg_modeling_resnet_skip.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import math
+from os.path import join as pjoin
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def np2th(weights, conv=False):
+    """Possibly convert HWIO to OIHW."""
+    if conv:
+        weights = weights.transpose([3, 2, 0, 1])
+    return torch.from_numpy(weights)
+class StdConv2d(nn.Conv2d):
+    def forward(self, x):
+        w = self.weight
+        v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False)
+        w = (w - m) / torch.sqrt(v + 1e-5)
+        return F.conv2d(x, w, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
+def conv3x3(cin, cout, stride=1, groups=1, bias=False):
+    return StdConv2d(cin, cout, kernel_size=3, stride=stride,
+                     padding=1, bias=bias, groups=groups)
+def conv1x1(cin, cout, stride=1, bias=False):
+    return StdConv2d(cin, cout, kernel_size=1, stride=stride,
+                     padding=0, bias=bias)
+class PreActBottleneck(nn.Module):
+    """Pre-activation (v2) bottleneck block.
+    """
+    def __init__(self, cin, cout=None, cmid=None, stride=1):
+        super().__init__()
+        cout = cout or cin
+        cmid = cmid or cout//4
+        self.gn1 = nn.GroupNorm(32, cmid, eps=1e-6)
+        self.conv1 = conv1x1(cin, cmid, bias=False)
+        self.gn2 = nn.GroupNorm(32, cmid, eps=1e-6)
+        self.conv2 = conv3x3(cmid, cmid, stride, bias=False)  # Original code has it on conv1!!
+        self.gn3 = nn.GroupNorm(32, cout, eps=1e-6)
+        self.conv3 = conv1x1(cmid, cout, bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        if (stride != 1 or cin != cout):
+            # Projection also with pre-activation according to paper.
+            self.downsample = conv1x1(cin, cout, stride, bias=False)
+            self.gn_proj = nn.GroupNorm(cout, cout)
+    def forward(self, x):
+        # Residual branch
+        residual = x
+        if hasattr(self, 'downsample'):
+            residual = self.downsample(x)
+            residual = self.gn_proj(residual)
+        # Unit's branch
+        y = self.relu(self.gn1(self.conv1(x)))
+        y = self.relu(self.gn2(self.conv2(y)))
+        y = self.gn3(self.conv3(y))
+        y = self.relu(residual + y)
+        return y
+    def load_from(self, weights, n_block, n_unit):
+        conv1_weight = np2th(weights[pjoin(n_block, n_unit, "conv1/kernel")], conv=True)
+        conv2_weight = np2th(weights[pjoin(n_block, n_unit, "conv2/kernel")], conv=True)
+        conv3_weight = np2th(weights[pjoin(n_block, n_unit, "conv3/kernel")], conv=True)
+        gn1_weight = np2th(weights[pjoin(n_block, n_unit, "gn1/scale")])
+        gn1_bias = np2th(weights[pjoin(n_block, n_unit, "gn1/bias")])
+        gn2_weight = np2th(weights[pjoin(n_block, n_unit, "gn2/scale")])
+        gn2_bias = np2th(weights[pjoin(n_block, n_unit, "gn2/bias")])
+        gn3_weight = np2th(weights[pjoin(n_block, n_unit, "gn3/scale")])
+        gn3_bias = np2th(weights[pjoin(n_block, n_unit, "gn3/bias")])
+        self.conv1.weight.copy_(conv1_weight)
+        self.conv2.weight.copy_(conv2_weight)
+        self.conv3.weight.copy_(conv3_weight)
+        self.gn1.weight.copy_(gn1_weight.view(-1))
+        self.gn1.bias.copy_(gn1_bias.view(-1))
+        self.gn2.weight.copy_(gn2_weight.view(-1))
+        self.gn2.bias.copy_(gn2_bias.view(-1))
+        self.gn3.weight.copy_(gn3_weight.view(-1))
+        self.gn3.bias.copy_(gn3_bias.view(-1))
+        if hasattr(self, 'downsample'):
+            proj_conv_weight = np2th(weights[pjoin(n_block, n_unit, "conv_proj/kernel")], conv=True)
+            proj_gn_weight = np2th(weights[pjoin(n_block, n_unit, "gn_proj/scale")])
+            proj_gn_bias = np2th(weights[pjoin(n_block, n_unit, "gn_proj/bias")])
+            self.downsample.weight.copy_(proj_conv_weight)
+            self.gn_proj.weight.copy_(proj_gn_weight.view(-1))
+            self.gn_proj.bias.copy_(proj_gn_bias.view(-1))
+class ResNetV2(nn.Module):
+    """Implementation of Pre-activation (v2) ResNet mode."""
+    def __init__(self, block_units, width_factor):
+        super().__init__()
+        width = int(64 * width_factor)
+        self.width = width
+        self.root = nn.Sequential(OrderedDict([
+            ('conv', StdConv2d(3, width, kernel_size=7, stride=2, bias=False, padding=3)),
+            ('gn', nn.GroupNorm(32, width, eps=1e-6)),
+            ('relu', nn.ReLU(inplace=True)),
+            # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0))
+        ]))
+        self.body = nn.Sequential(OrderedDict([
+            ('block1', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width, cout=width*4, cmid=width))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*4, cout=width*4, cmid=width)) for i in range(2, block_units[0] + 1)],
+                ))),
+            ('block2', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width*4, cout=width*8, cmid=width*2, stride=2))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*8, cout=width*8, cmid=width*2)) for i in range(2, block_units[1] + 1)],
+                ))),
+            ('block3', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width*8, cout=width*16, cmid=width*4, stride=2))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*16, cout=width*16, cmid=width*4)) for i in range(2, block_units[2] + 1)],
+                ))),
+        ]))
+    def forward(self, x):
+        features = []
+        b, c, in_size, _ = x.size()
+        x = self.root(x)
+        features.append(x)
+        x = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)(x)
+        for i in range(len(self.body)-1):
+            x = self.body[i](x)
+            right_size = int(in_size / 4 / (i+1))
+            if x.size()[2] != right_size:
+                pad = right_size - x.size()[2]
+                assert pad < 3 and pad > 0, "x {} should {}".format(x.size(), right_size)
+                feat = torch.zeros((b, x.size()[1], right_size, right_size), device=x.device)
+                feat[:, :, 0:x.size()[2], 0:x.size()[3]] = x[:]
+            else:
+                feat = x
+            features.append(feat)
+        x = self.body[-1](x)
+        return x, features[::-1]

models/_transunet/vit_seg_modeling_resnet_skip_c4.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import math
+from os.path import join as pjoin
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def np2th(weights, conv=False):
+    """Possibly convert HWIO to OIHW."""
+    if conv:
+        weights = weights.transpose([3, 2, 0, 1])
+    return torch.from_numpy(weights)
+class StdConv2d(nn.Conv2d):
+    def forward(self, x):
+        w = self.weight
+        v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False)
+        w = (w - m) / torch.sqrt(v + 1e-5)
+        return F.conv2d(x, w, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
+def conv3x3(cin, cout, stride=1, groups=1, bias=False):
+    return StdConv2d(cin, cout, kernel_size=3, stride=stride,
+                     padding=1, bias=bias, groups=groups)
+def conv1x1(cin, cout, stride=1, bias=False):
+    return StdConv2d(cin, cout, kernel_size=1, stride=stride,
+                     padding=0, bias=bias)
+class PreActBottleneck(nn.Module):
+    """Pre-activation (v2) bottleneck block.
+    """
+    def __init__(self, cin, cout=None, cmid=None, stride=1):
+        super().__init__()
+        cout = cout or cin
+        cmid = cmid or cout//4
+        self.gn1 = nn.GroupNorm(32, cmid, eps=1e-6)
+        self.conv1 = conv1x1(cin, cmid, bias=False)
+        self.gn2 = nn.GroupNorm(32, cmid, eps=1e-6)
+        self.conv2 = conv3x3(cmid, cmid, stride, bias=False)  # Original code has it on conv1!!
+        self.gn3 = nn.GroupNorm(32, cout, eps=1e-6)
+        self.conv3 = conv1x1(cmid, cout, bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        if (stride != 1 or cin != cout):
+            # Projection also with pre-activation according to paper.
+            self.downsample = conv1x1(cin, cout, stride, bias=False)
+            self.gn_proj = nn.GroupNorm(cout, cout)
+    def forward(self, x):
+        # Residual branch
+        residual = x
+        if hasattr(self, 'downsample'):
+            residual = self.downsample(x)
+            residual = self.gn_proj(residual)
+        # Unit's branch
+        y = self.relu(self.gn1(self.conv1(x)))
+        y = self.relu(self.gn2(self.conv2(y)))
+        y = self.gn3(self.conv3(y))
+        y = self.relu(residual + y)
+        return y
+    def load_from(self, weights, n_block, n_unit):
+        conv1_weight = np2th(weights[pjoin(n_block, n_unit, "conv1/kernel")], conv=True)
+        conv2_weight = np2th(weights[pjoin(n_block, n_unit, "conv2/kernel")], conv=True)
+        conv3_weight = np2th(weights[pjoin(n_block, n_unit, "conv3/kernel")], conv=True)
+        gn1_weight = np2th(weights[pjoin(n_block, n_unit, "gn1/scale")])
+        gn1_bias = np2th(weights[pjoin(n_block, n_unit, "gn1/bias")])
+        gn2_weight = np2th(weights[pjoin(n_block, n_unit, "gn2/scale")])
+        gn2_bias = np2th(weights[pjoin(n_block, n_unit, "gn2/bias")])
+        gn3_weight = np2th(weights[pjoin(n_block, n_unit, "gn3/scale")])
+        gn3_bias = np2th(weights[pjoin(n_block, n_unit, "gn3/bias")])
+        self.conv1.weight.copy_(conv1_weight)
+        self.conv2.weight.copy_(conv2_weight)
+        self.conv3.weight.copy_(conv3_weight)
+        self.gn1.weight.copy_(gn1_weight.view(-1))
+        self.gn1.bias.copy_(gn1_bias.view(-1))
+        self.gn2.weight.copy_(gn2_weight.view(-1))
+        self.gn2.bias.copy_(gn2_bias.view(-1))
+        self.gn3.weight.copy_(gn3_weight.view(-1))
+        self.gn3.bias.copy_(gn3_bias.view(-1))
+        if hasattr(self, 'downsample'):
+            proj_conv_weight = np2th(weights[pjoin(n_block, n_unit, "conv_proj/kernel")], conv=True)
+            proj_gn_weight = np2th(weights[pjoin(n_block, n_unit, "gn_proj/scale")])
+            proj_gn_bias = np2th(weights[pjoin(n_block, n_unit, "gn_proj/bias")])
+            self.downsample.weight.copy_(proj_conv_weight)
+            self.gn_proj.weight.copy_(proj_gn_weight.view(-1))
+            self.gn_proj.bias.copy_(proj_gn_bias.view(-1))
+class ResNetV2(nn.Module):
+    """Implementation of Pre-activation (v2) ResNet mode."""
+    def __init__(self, block_units, width_factor):
+        super().__init__()
+        width = int(64 * width_factor)
+        self.width = width
+        self.root = nn.Sequential(OrderedDict([
+            ('conv', StdConv2d(4, width, kernel_size=7, stride=2, bias=False, padding=3)),
+            ('gn', nn.GroupNorm(32, width, eps=1e-6)),
+            ('relu', nn.ReLU(inplace=True)),
+            # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0))
+        ]))
+        self.body = nn.Sequential(OrderedDict([
+            ('block1', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width, cout=width*4, cmid=width))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*4, cout=width*4, cmid=width)) for i in range(2, block_units[0] + 1)],
+                ))),
+            ('block2', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width*4, cout=width*8, cmid=width*2, stride=2))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*8, cout=width*8, cmid=width*2)) for i in range(2, block_units[1] + 1)],
+                ))),
+            ('block3', nn.Sequential(OrderedDict(
+                [('unit1', PreActBottleneck(cin=width*8, cout=width*16, cmid=width*4, stride=2))] +
+                [(f'unit{i:d}', PreActBottleneck(cin=width*16, cout=width*16, cmid=width*4)) for i in range(2, block_units[2] + 1)],
+                ))),
+        ]))
+    def forward(self, x):
+        features = []
+        b, c, in_size, _ = x.size()
+        x = self.root(x)
+        features.append(x)
+        x = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)(x)
+        for i in range(len(self.body)-1):
+            x = self.body[i](x)
+            right_size = int(in_size / 4 / (i+1))
+            if x.size()[2] != right_size:
+                pad = right_size - x.size()[2]
+                assert pad < 3 and pad > 0, "x {} should {}".format(x.size(), right_size)
+                feat = torch.zeros((b, x.size()[1], right_size, right_size), device=x.device)
+                feat[:, :, 0:x.size()[2], 0:x.size()[3]] = x[:]
+            else:
+                feat = x
+            features.append(feat)
+        x = self.body[-1](x)
+        return x, features[::-1]

models/_uctransnet/CTrans.py ADDED Viewed

	@@ -0,0 +1,365 @@

+# -*- coding: utf-8 -*-
+# @Author  : Haonan Wang
+# @File    : CTrans.py
+# @Software: PyCharm
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import Dropout, Softmax, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+logger = logging.getLogger(__name__)
+class Channel_Embeddings(nn.Module):
+    """Construct the embeddings from patch, position embeddings.
+    """
+    def __init__(self,config, patchsize, img_size, in_channels):
+        super().__init__()
+        img_size = _pair(img_size)
+        patch_size = _pair(patchsize)
+        n_patches = (img_size[0] // patch_size[0]) * (img_size[1] // patch_size[1])
+        self.patch_embeddings = Conv2d(in_channels=in_channels,
+                                       out_channels=in_channels,
+                                       kernel_size=patch_size,
+                                       stride=patch_size)
+        self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches, in_channels))
+        self.dropout = Dropout(config.transformer["embeddings_dropout_rate"])
+    def forward(self, x):
+        if x is None:
+            return None
+        x = self.patch_embeddings(x)  # (B, hidden. n_patches^(1/2), n_patches^(1/2))
+        x = x.flatten(2)
+        x = x.transpose(-1, -2)  # (B, n_patches, hidden)
+        embeddings = x + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings
+class Reconstruct(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, scale_factor):
+        super(Reconstruct, self).__init__()
+        if kernel_size == 3:
+            padding = 1
+        else:
+            padding = 0
+        self.conv = nn.Conv2d(in_channels, out_channels,kernel_size=kernel_size, padding=padding)
+        self.norm = nn.BatchNorm2d(out_channels)
+        self.activation = nn.ReLU(inplace=True)
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        if x is None:
+            return None
+        B, n_patch, hidden = x.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = x.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        x = nn.Upsample(scale_factor=self.scale_factor)(x)
+        out = self.conv(x)
+        out = self.norm(out)
+        out = self.activation(out)
+        return out
+class Attention_org(nn.Module):
+    def __init__(self, config, vis,channel_num):
+        super(Attention_org, self).__init__()
+        self.vis = vis
+        self.KV_size = config.KV_size
+        self.channel_num = channel_num
+        self.num_attention_heads = config.transformer["num_heads"]
+        self.query1 = nn.ModuleList()
+        self.query2 = nn.ModuleList()
+        self.query3 = nn.ModuleList()
+        self.query4 = nn.ModuleList()
+        self.key = nn.ModuleList()
+        self.value = nn.ModuleList()
+        for _ in range(config.transformer["num_heads"]):
+            query1 = nn.Linear(channel_num[0], channel_num[0], bias=False)
+            query2 = nn.Linear(channel_num[1], channel_num[1], bias=False)
+            query3 = nn.Linear(channel_num[2], channel_num[2], bias=False)
+            query4 = nn.Linear(channel_num[3], channel_num[3], bias=False)
+            key = nn.Linear( self.KV_size,  self.KV_size, bias=False)
+            value = nn.Linear(self.KV_size,  self.KV_size, bias=False)
+            self.query1.append(copy.deepcopy(query1))
+            self.query2.append(copy.deepcopy(query2))
+            self.query3.append(copy.deepcopy(query3))
+            self.query4.append(copy.deepcopy(query4))
+            self.key.append(copy.deepcopy(key))
+            self.value.append(copy.deepcopy(value))
+        self.psi = nn.InstanceNorm2d(self.num_attention_heads)
+        self.softmax = Softmax(dim=3)
+        self.out1 = nn.Linear(channel_num[0], channel_num[0], bias=False)
+        self.out2 = nn.Linear(channel_num[1], channel_num[1], bias=False)
+        self.out3 = nn.Linear(channel_num[2], channel_num[2], bias=False)
+        self.out4 = nn.Linear(channel_num[3], channel_num[3], bias=False)
+        self.attn_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.proj_dropout = Dropout(config.transformer["attention_dropout_rate"])
+    def forward(self, emb1,emb2,emb3,emb4, emb_all):
+        multi_head_Q1_list = []
+        multi_head_Q2_list = []
+        multi_head_Q3_list = []
+        multi_head_Q4_list = []
+        multi_head_K_list = []
+        multi_head_V_list = []
+        if emb1 is not None:
+            for query1 in self.query1:
+                Q1 = query1(emb1)
+                multi_head_Q1_list.append(Q1)
+        if emb2 is not None:
+            for query2 in self.query2:
+                Q2 = query2(emb2)
+                multi_head_Q2_list.append(Q2)
+        if emb3 is not None:
+            for query3 in self.query3:
+                Q3 = query3(emb3)
+                multi_head_Q3_list.append(Q3)
+        if emb4 is not None:
+            for query4 in self.query4:
+                Q4 = query4(emb4)
+                multi_head_Q4_list.append(Q4)
+        for key in self.key:
+            K = key(emb_all)
+            multi_head_K_list.append(K)
+        for value in self.value:
+            V = value(emb_all)
+            multi_head_V_list.append(V)
+        # print(len(multi_head_Q4_list))
+        multi_head_Q1 = torch.stack(multi_head_Q1_list, dim=1) if emb1 is not None else None
+        multi_head_Q2 = torch.stack(multi_head_Q2_list, dim=1) if emb2 is not None else None
+        multi_head_Q3 = torch.stack(multi_head_Q3_list, dim=1) if emb3 is not None else None
+        multi_head_Q4 = torch.stack(multi_head_Q4_list, dim=1) if emb4 is not None else None
+        multi_head_K = torch.stack(multi_head_K_list, dim=1)
+        multi_head_V = torch.stack(multi_head_V_list, dim=1)
+        multi_head_Q1 = multi_head_Q1.transpose(-1, -2) if emb1 is not None else None
+        multi_head_Q2 = multi_head_Q2.transpose(-1, -2) if emb2 is not None else None
+        multi_head_Q3 = multi_head_Q3.transpose(-1, -2) if emb3 is not None else None
+        multi_head_Q4 = multi_head_Q4.transpose(-1, -2) if emb4 is not None else None
+        attention_scores1 = torch.matmul(multi_head_Q1, multi_head_K) if emb1 is not None else None
+        attention_scores2 = torch.matmul(multi_head_Q2, multi_head_K) if emb2 is not None else None
+        attention_scores3 = torch.matmul(multi_head_Q3, multi_head_K) if emb3 is not None else None
+        attention_scores4 = torch.matmul(multi_head_Q4, multi_head_K) if emb4 is not None else None
+        attention_scores1 = attention_scores1 / math.sqrt(self.KV_size) if emb1 is not None else None
+        attention_scores2 = attention_scores2 / math.sqrt(self.KV_size) if emb2 is not None else None
+        attention_scores3 = attention_scores3 / math.sqrt(self.KV_size) if emb3 is not None else None
+        attention_scores4 = attention_scores4 / math.sqrt(self.KV_size) if emb4 is not None else None
+        attention_probs1 = self.softmax(self.psi(attention_scores1)) if emb1 is not None else None
+        attention_probs2 = self.softmax(self.psi(attention_scores2)) if emb2 is not None else None
+        attention_probs3 = self.softmax(self.psi(attention_scores3)) if emb3 is not None else None
+        attention_probs4 = self.softmax(self.psi(attention_scores4)) if emb4 is not None else None
+        # print(attention_probs4.size())
+        if self.vis:
+            weights =  []
+            weights.append(attention_probs1.mean(1))
+            weights.append(attention_probs2.mean(1))
+            weights.append(attention_probs3.mean(1))
+            weights.append(attention_probs4.mean(1))
+        else: weights=None
+        attention_probs1 = self.attn_dropout(attention_probs1) if emb1 is not None else None
+        attention_probs2 = self.attn_dropout(attention_probs2) if emb2 is not None else None
+        attention_probs3 = self.attn_dropout(attention_probs3) if emb3 is not None else None
+        attention_probs4 = self.attn_dropout(attention_probs4) if emb4 is not None else None
+        multi_head_V = multi_head_V.transpose(-1, -2)
+        context_layer1 = torch.matmul(attention_probs1, multi_head_V) if emb1 is not None else None
+        context_layer2 = torch.matmul(attention_probs2, multi_head_V) if emb2 is not None else None
+        context_layer3 = torch.matmul(attention_probs3, multi_head_V) if emb3 is not None else None
+        context_layer4 = torch.matmul(attention_probs4, multi_head_V) if emb4 is not None else None
+        context_layer1 = context_layer1.permute(0, 3, 2, 1).contiguous() if emb1 is not None else None
+        context_layer2 = context_layer2.permute(0, 3, 2, 1).contiguous() if emb2 is not None else None
+        context_layer3 = context_layer3.permute(0, 3, 2, 1).contiguous() if emb3 is not None else None
+        context_layer4 = context_layer4.permute(0, 3, 2, 1).contiguous() if emb4 is not None else None
+        context_layer1 = context_layer1.mean(dim=3) if emb1 is not None else None
+        context_layer2 = context_layer2.mean(dim=3) if emb2 is not None else None
+        context_layer3 = context_layer3.mean(dim=3) if emb3 is not None else None
+        context_layer4 = context_layer4.mean(dim=3) if emb4 is not None else None
+        O1 = self.out1(context_layer1) if emb1 is not None else None
+        O2 = self.out2(context_layer2) if emb2 is not None else None
+        O3 = self.out3(context_layer3) if emb3 is not None else None
+        O4 = self.out4(context_layer4) if emb4 is not None else None
+        O1 = self.proj_dropout(O1) if emb1 is not None else None
+        O2 = self.proj_dropout(O2) if emb2 is not None else None
+        O3 = self.proj_dropout(O3) if emb3 is not None else None
+        O4 = self.proj_dropout(O4) if emb4 is not None else None
+        return O1,O2,O3,O4, weights
+class Mlp(nn.Module):
+    def __init__(self,config, in_channel, mlp_channel):
+        super(Mlp, self).__init__()
+        self.fc1 = nn.Linear(in_channel, mlp_channel)
+        self.fc2 = nn.Linear(mlp_channel, in_channel)
+        self.act_fn = nn.GELU()
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+        self._init_weights()
+    def _init_weights(self):
+        nn.init.xavier_uniform_(self.fc1.weight)
+        nn.init.xavier_uniform_(self.fc2.weight)
+        nn.init.normal_(self.fc1.bias, std=1e-6)
+        nn.init.normal_(self.fc2.bias, std=1e-6)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act_fn(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = self.dropout(x)
+        return x
+class Block_ViT(nn.Module):
+    def __init__(self, config, vis, channel_num):
+        super(Block_ViT, self).__init__()
+        expand_ratio = config.expand_ratio
+        self.attn_norm1 = LayerNorm(channel_num[0],eps=1e-6)
+        self.attn_norm2 = LayerNorm(channel_num[1],eps=1e-6)
+        self.attn_norm3 = LayerNorm(channel_num[2],eps=1e-6)
+        self.attn_norm4 = LayerNorm(channel_num[3],eps=1e-6)
+        self.attn_norm =  LayerNorm(config.KV_size,eps=1e-6)
+        self.channel_attn = Attention_org(config, vis, channel_num)
+        self.ffn_norm1 = LayerNorm(channel_num[0],eps=1e-6)
+        self.ffn_norm2 = LayerNorm(channel_num[1],eps=1e-6)
+        self.ffn_norm3 = LayerNorm(channel_num[2],eps=1e-6)
+        self.ffn_norm4 = LayerNorm(channel_num[3],eps=1e-6)
+        self.ffn1 = Mlp(config,channel_num[0],channel_num[0]*expand_ratio)
+        self.ffn2 = Mlp(config,channel_num[1],channel_num[1]*expand_ratio)
+        self.ffn3 = Mlp(config,channel_num[2],channel_num[2]*expand_ratio)
+        self.ffn4 = Mlp(config,channel_num[3],channel_num[3]*expand_ratio)
+    def forward(self, emb1,emb2,emb3,emb4):
+        embcat = []
+        org1 = emb1
+        org2 = emb2
+        org3 = emb3
+        org4 = emb4
+        for i in range(4):
+            var_name = "emb"+str(i+1)
+            tmp_var = locals()[var_name]
+            if tmp_var is not None:
+                embcat.append(tmp_var)
+        emb_all = torch.cat(embcat,dim=2)
+        cx1 = self.attn_norm1(emb1) if emb1 is not None else None
+        cx2 = self.attn_norm2(emb2) if emb2 is not None else None
+        cx3 = self.attn_norm3(emb3) if emb3 is not None else None
+        cx4 = self.attn_norm4(emb4) if emb4 is not None else None
+        emb_all = self.attn_norm(emb_all)
+        cx1,cx2,cx3,cx4, weights = self.channel_attn(cx1,cx2,cx3,cx4,emb_all)
+        cx1 = org1 + cx1 if emb1 is not None else None
+        cx2 = org2 + cx2 if emb2 is not None else None
+        cx3 = org3 + cx3 if emb3 is not None else None
+        cx4 = org4 + cx4 if emb4 is not None else None
+        org1 = cx1
+        org2 = cx2
+        org3 = cx3
+        org4 = cx4
+        x1 = self.ffn_norm1(cx1) if emb1 is not None else None
+        x2 = self.ffn_norm2(cx2) if emb2 is not None else None
+        x3 = self.ffn_norm3(cx3) if emb3 is not None else None
+        x4 = self.ffn_norm4(cx4) if emb4 is not None else None
+        x1 = self.ffn1(x1) if emb1 is not None else None
+        x2 = self.ffn2(x2) if emb2 is not None else None
+        x3 = self.ffn3(x3) if emb3 is not None else None
+        x4 = self.ffn4(x4) if emb4 is not None else None
+        x1 = x1 + org1 if emb1 is not None else None
+        x2 = x2 + org2 if emb2 is not None else None
+        x3 = x3 + org3 if emb3 is not None else None
+        x4 = x4 + org4 if emb4 is not None else None
+        return x1, x2, x3, x4, weights
+class Encoder(nn.Module):
+    def __init__(self, config, vis, channel_num):
+        super(Encoder, self).__init__()
+        self.vis = vis
+        self.layer = nn.ModuleList()
+        self.encoder_norm1 = LayerNorm(channel_num[0],eps=1e-6)
+        self.encoder_norm2 = LayerNorm(channel_num[1],eps=1e-6)
+        self.encoder_norm3 = LayerNorm(channel_num[2],eps=1e-6)
+        self.encoder_norm4 = LayerNorm(channel_num[3],eps=1e-6)
+        for _ in range(config.transformer["num_layers"]):
+            layer = Block_ViT(config, vis, channel_num)
+            self.layer.append(copy.deepcopy(layer))
+    def forward(self, emb1,emb2,emb3,emb4):
+        attn_weights = []
+        for layer_block in self.layer:
+            emb1,emb2,emb3,emb4, weights = layer_block(emb1,emb2,emb3,emb4)
+            if self.vis:
+                attn_weights.append(weights)
+        emb1 = self.encoder_norm1(emb1) if emb1 is not None else None
+        emb2 = self.encoder_norm2(emb2) if emb2 is not None else None
+        emb3 = self.encoder_norm3(emb3) if emb3 is not None else None
+        emb4 = self.encoder_norm4(emb4) if emb4 is not None else None
+        return emb1,emb2,emb3,emb4, attn_weights
+class ChannelTransformer(nn.Module):
+    def __init__(self, config, vis, img_size, channel_num=[64, 128, 256, 512], patchSize=[32, 16, 8, 4]):
+        super().__init__()
+        self.patchSize_1 = patchSize[0]
+        self.patchSize_2 = patchSize[1]
+        self.patchSize_3 = patchSize[2]
+        self.patchSize_4 = patchSize[3]
+        self.embeddings_1 = Channel_Embeddings(config,self.patchSize_1, img_size=img_size,    in_channels=channel_num[0])
+        self.embeddings_2 = Channel_Embeddings(config,self.patchSize_2, img_size=img_size//2, in_channels=channel_num[1])
+        self.embeddings_3 = Channel_Embeddings(config,self.patchSize_3, img_size=img_size//4, in_channels=channel_num[2])
+        self.embeddings_4 = Channel_Embeddings(config,self.patchSize_4, img_size=img_size//8, in_channels=channel_num[3])
+        self.encoder = Encoder(config, vis, channel_num)
+        self.reconstruct_1 = Reconstruct(channel_num[0], channel_num[0], kernel_size=1,scale_factor=(self.patchSize_1,self.patchSize_1))
+        self.reconstruct_2 = Reconstruct(channel_num[1], channel_num[1], kernel_size=1,scale_factor=(self.patchSize_2,self.patchSize_2))
+        self.reconstruct_3 = Reconstruct(channel_num[2], channel_num[2], kernel_size=1,scale_factor=(self.patchSize_3,self.patchSize_3))
+        self.reconstruct_4 = Reconstruct(channel_num[3], channel_num[3], kernel_size=1,scale_factor=(self.patchSize_4,self.patchSize_4))
+    def forward(self,en1,en2,en3,en4):
+        emb1 = self.embeddings_1(en1)
+        emb2 = self.embeddings_2(en2)
+        emb3 = self.embeddings_3(en3)
+        emb4 = self.embeddings_4(en4)
+        encoded1, encoded2, encoded3, encoded4, attn_weights = self.encoder(emb1,emb2,emb3,emb4)  # (B, n_patch, hidden)
+        x1 = self.reconstruct_1(encoded1) if en1 is not None else None
+        x2 = self.reconstruct_2(encoded2) if en2 is not None else None
+        x3 = self.reconstruct_3(encoded3) if en3 is not None else None
+        x4 = self.reconstruct_4(encoded4) if en4 is not None else None
+        x1 = x1 + en1  if en1 is not None else None
+        x2 = x2 + en2  if en2 is not None else None
+        x3 = x3 + en3  if en3 is not None else None
+        x4 = x4 + en4  if en4 is not None else None
+        return x1, x2, x3, x4, attn_weights

models/_uctransnet/Config.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/6/19 2:44 下午
+# @Author  : Haonan Wang
+# @File    : Config.py
+# @Software: PyCharm
+import os
+import torch
+import time
+import ml_collections
+## PARAMETERS OF THE MODEL
+save_model = True
+tensorboard = True
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+use_cuda = torch.cuda.is_available()
+seed = 666
+os.environ['PYTHONHASHSEED'] = str(seed)
+cosineLR = True # whether use cosineLR or not
+n_channels = 3
+n_labels = 1
+epochs = 2000
+img_size = 224
+print_frequency = 1
+save_frequency = 5000
+vis_frequency = 10
+early_stopping_patience = 50
+pretrain = False
+task_name = 'MoNuSeg' # GlaS MoNuSeg
+# task_name = 'GlaS'
+learning_rate = 1e-3
+batch_size = 4
+# model_name = 'UCTransNet'
+model_name = 'UCTransNet_pretrain'
+train_dataset = './datasets/'+ task_name+ '/Train_Folder/'
+val_dataset = './datasets/'+ task_name+ '/Val_Folder/'
+test_dataset = './datasets/'+ task_name+ '/Test_Folder/'
+session_name       = 'Test_session' + '_' + time.strftime('%m.%d_%Hh%M')
+save_path          = task_name +'/'+ model_name +'/' + session_name + '/'
+model_path         = save_path + 'models/'
+tensorboard_folder = save_path + 'tensorboard_logs/'
+logger_path        = save_path + session_name + ".log"
+visualize_path     = save_path + 'visualize_val/'
+##########################################################################
+# CTrans configs
+##########################################################################
+def get_CTranS_config():
+    config = ml_collections.ConfigDict()
+    config.transformer = ml_collections.ConfigDict()
+    config.KV_size = 960  # KV_size = Q1 + Q2 + Q3 + Q4
+    config.transformer.num_heads  = 4
+    config.transformer.num_layers = 4
+    config.expand_ratio           = 4  # MLP channel dimension expand ratio
+    config.transformer.embeddings_dropout_rate = 0.1
+    config.transformer.attention_dropout_rate = 0.1
+    config.transformer.dropout_rate = 0
+    config.patch_sizes = [16,8,4,2]
+    config.base_channel = 64 # base channel of U-Net
+    config.n_classes = 1
+    return config
+# used in testing phase, copy the session name in training phase
+test_session = "Test_session_07.03_20h39"

models/_uctransnet/UCTransNet.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/7/8 8:59 上午
+# @File    : UCTransNet.py
+# @Software: PyCharm
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+from .CTrans import ChannelTransformer
+def get_activation(activation_type):
+    activation_type = activation_type.lower()
+    if hasattr(nn, activation_type):
+        return getattr(nn, activation_type)()
+    else:
+        return nn.ReLU()
+def _make_nConv(in_channels, out_channels, nb_Conv, activation='ReLU'):
+    layers = []
+    layers.append(ConvBatchNorm(in_channels, out_channels, activation))
+    for _ in range(nb_Conv - 1):
+        layers.append(ConvBatchNorm(out_channels, out_channels, activation))
+    return nn.Sequential(*layers)
+class ConvBatchNorm(nn.Module):
+    """(convolution => [BN] => ReLU)"""
+    def __init__(self, in_channels, out_channels, activation='ReLU'):
+        super(ConvBatchNorm, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels,
+                              kernel_size=3, padding=1)
+        self.norm = nn.BatchNorm2d(out_channels)
+        self.activation = get_activation(activation)
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        return self.activation(out)
+class DownBlock(nn.Module):
+    """Downscaling with maxpool convolution"""
+    def __init__(self, in_channels, out_channels, nb_Conv, activation='ReLU'):
+        super(DownBlock, self).__init__()
+        self.maxpool = nn.MaxPool2d(2)
+        self.nConvs = _make_nConv(in_channels, out_channels, nb_Conv, activation)
+    def forward(self, x):
+        out = self.maxpool(x)
+        return self.nConvs(out)
+class Flatten(nn.Module):
+    def forward(self, x):
+        return x.view(x.size(0), -1)
+class CCA(nn.Module):
+    """
+    CCA Block
+    """
+    def __init__(self, F_g, F_x):
+        super().__init__()
+        self.mlp_x = nn.Sequential(
+            Flatten(),
+            nn.Linear(F_x, F_x))
+        self.mlp_g = nn.Sequential(
+            Flatten(),
+            nn.Linear(F_g, F_x))
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, g, x):
+        # channel-wise attention
+        avg_pool_x = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
+        channel_att_x = self.mlp_x(avg_pool_x)
+        avg_pool_g = F.avg_pool2d( g, (g.size(2), g.size(3)), stride=(g.size(2), g.size(3)))
+        channel_att_g = self.mlp_g(avg_pool_g)
+        channel_att_sum = (channel_att_x + channel_att_g)/2.0
+        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
+        x_after_channel = x * scale
+        out = self.relu(x_after_channel)
+        return out
+class UpBlock_attention(nn.Module):
+    def __init__(self, in_channels, out_channels, nb_Conv, activation='ReLU'):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2)
+        self.coatt = CCA(F_g=in_channels//2, F_x=in_channels//2)
+        self.nConvs = _make_nConv(in_channels, out_channels, nb_Conv, activation)
+    def forward(self, x, skip_x):
+        up = self.up(x)
+        skip_x_att = self.coatt(g=up, x=skip_x)
+        x = torch.cat([skip_x_att, up], dim=1)  # dim 1 is the channel dimension
+        return self.nConvs(x)
+class UCTransNet(nn.Module):
+    def __init__(self, config,n_channels=3, n_classes=1,img_size=224,vis=False):
+        super().__init__()
+        self.vis = vis
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        in_channels = config.base_channel
+        self.inc = ConvBatchNorm(n_channels, in_channels)
+        self.down1 = DownBlock(in_channels, in_channels*2, nb_Conv=2)
+        self.down2 = DownBlock(in_channels*2, in_channels*4, nb_Conv=2)
+        self.down3 = DownBlock(in_channels*4, in_channels*8, nb_Conv=2)
+        self.down4 = DownBlock(in_channels*8, in_channels*8, nb_Conv=2)
+        self.mtc = ChannelTransformer(config, vis, img_size,
+                                     channel_num=[in_channels, in_channels*2, in_channels*4, in_channels*8],
+                                     patchSize=config.patch_sizes)
+        self.up4 = UpBlock_attention(in_channels*16, in_channels*4, nb_Conv=2)
+        self.up3 = UpBlock_attention(in_channels*8, in_channels*2, nb_Conv=2)
+        self.up2 = UpBlock_attention(in_channels*4, in_channels, nb_Conv=2)
+        self.up1 = UpBlock_attention(in_channels*2, in_channels, nb_Conv=2)
+        self.outc = nn.Conv2d(in_channels, n_classes, kernel_size=(1,1), stride=(1,1))
+        self.last_activation = nn.Sigmoid() # if using BCELoss
+    def forward(self, x):
+        x = x.float()
+        x1 = self.inc(x)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x1,x2,x3,x4,att_weights = self.mtc(x1,x2,x3,x4)
+        x = self.up4(x5, x4)
+        x = self.up3(x, x3)
+        x = self.up2(x, x2)
+        x = self.up1(x, x1)
+        if self.n_classes ==1:
+            logits = self.last_activation(self.outc(x))
+        else:
+            logits = self.outc(x) # if nusing BCEWithLogitsLoss or class>1
+        if self.vis: # visualize the attention maps
+            return logits, att_weights
+        else:
+            return logits

models/_uctransnet/UNet.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import torch.nn as nn
+import torch
+def get_activation(activation_type):
+    activation_type = activation_type.lower()
+    if hasattr(nn, activation_type):
+        return getattr(nn, activation_type)()
+    else:
+        return nn.ReLU()
+def _make_nConv(in_channels, out_channels, nb_Conv, activation='ReLU'):
+    layers = []
+    layers.append(ConvBatchNorm(in_channels, out_channels, activation))
+    for _ in range(nb_Conv - 1):
+        layers.append(ConvBatchNorm(out_channels, out_channels, activation))
+    return nn.Sequential(*layers)
+class ConvBatchNorm(nn.Module):
+    """(convolution => [BN] => ReLU)"""
+    def __init__(self, in_channels, out_channels, activation='ReLU'):
+        super(ConvBatchNorm, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels,
+                              kernel_size=3, padding=1)
+        self.norm = nn.BatchNorm2d(out_channels)
+        self.activation = get_activation(activation)
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        return self.activation(out)
+class DownBlock(nn.Module):
+    """Downscaling with maxpool convolution"""
+    def __init__(self, in_channels, out_channels, nb_Conv, activation='ReLU'):
+        super(DownBlock, self).__init__()
+        self.maxpool = nn.MaxPool2d(2)
+        self.nConvs = _make_nConv(in_channels, out_channels, nb_Conv, activation)
+    def forward(self, x):
+        out = self.maxpool(x)
+        return self.nConvs(out)
+class UpBlock(nn.Module):
+    """Upscaling then conv"""
+    def __init__(self, in_channels, out_channels, nb_Conv, activation='ReLU'):
+        super(UpBlock, self).__init__()
+        # self.up = nn.Upsample(scale_factor=2)
+        self.up = nn.ConvTranspose2d(in_channels//2,in_channels//2,(2,2),2)
+        self.nConvs = _make_nConv(in_channels, out_channels, nb_Conv, activation)
+    def forward(self, x, skip_x):
+        out = self.up(x)
+        x = torch.cat([out, skip_x], dim=1)  # dim 1 is the channel dimension
+        return self.nConvs(x)
+class UNet(nn.Module):
+    def __init__(self, n_channels=3, n_classes=9):
+        '''
+        n_channels : number of channels of the input.
+                        By default 3, because we have RGB images
+        n_labels : number of channels of the ouput.
+                      By default 3 (2 labels + 1 for the background)
+        '''
+        super().__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        # Question here
+        in_channels = 64
+        self.inc = ConvBatchNorm(n_channels, in_channels)
+        self.down1 = DownBlock(in_channels, in_channels*2, nb_Conv=2)
+        self.down2 = DownBlock(in_channels*2, in_channels*4, nb_Conv=2)
+        self.down3 = DownBlock(in_channels*4, in_channels*8, nb_Conv=2)
+        self.down4 = DownBlock(in_channels*8, in_channels*8, nb_Conv=2)
+        self.up4 = UpBlock(in_channels*16, in_channels*4, nb_Conv=2)
+        self.up3 = UpBlock(in_channels*8, in_channels*2, nb_Conv=2)
+        self.up2 = UpBlock(in_channels*4, in_channels, nb_Conv=2)
+        self.up1 = UpBlock(in_channels*2, in_channels, nb_Conv=2)
+        self.outc = nn.Conv2d(in_channels, n_classes, kernel_size=(1,1))
+        if n_classes == 1:
+            self.last_activation = nn.Sigmoid()
+        else:
+            self.last_activation = None
+    def forward(self, x):
+        # Question here
+        x = x.float()
+        x1 = self.inc(x)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x = self.up4(x5, x4)
+        x = self.up3(x, x3)
+        x = self.up2(x, x2)
+        x = self.up1(x, x1)
+        if self.last_activation is not None:
+            logits = self.last_activation(self.outc(x))
+            # print("111")
+        else:
+            logits = self.outc(x)
+            # print("222")
+        # logits = self.outc(x) # if using BCEWithLogitsLoss
+        # print(logits.size())
+        return logits

models/attunet.py ADDED Viewed

	@@ -0,0 +1,427 @@

+# https://github.com/LeeJunHyun/Image_Segmentation
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init
+def init_weights(net, init_type='normal', gain=0.02):
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
+            if init_type == 'normal':
+                init.normal_(m.weight.data, 0.0, gain)
+            elif init_type == 'xavier':
+                init.xavier_normal_(m.weight.data, gain=gain)
+            elif init_type == 'kaiming':
+                init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                init.orthogonal_(m.weight.data, gain=gain)
+            else:
+                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                init.constant_(m.bias.data, 0.0)
+        elif classname.find('BatchNorm2d') != -1:
+            init.normal_(m.weight.data, 1.0, gain)
+            init.constant_(m.bias.data, 0.0)
+    print('initialize network with %s' % init_type)
+    net.apply(init_func)
+class conv_block(nn.Module):
+    def __init__(self,ch_in,ch_out):
+        super(conv_block,self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(ch_out, ch_out, kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        x = self.conv(x)
+        return x
+class up_conv(nn.Module):
+    def __init__(self,ch_in,ch_out):
+        super(up_conv,self).__init__()
+        self.up = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        x = self.up(x)
+        return x
+class Recurrent_block(nn.Module):
+    def __init__(self,ch_out,t=2):
+        super(Recurrent_block,self).__init__()
+        self.t = t
+        self.ch_out = ch_out
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        for i in range(self.t):
+            if i==0:
+                x1 = self.conv(x)
+            x1 = self.conv(x+x1)
+        return x1
+class RRCNN_block(nn.Module):
+    def __init__(self,ch_in,ch_out,t=2):
+        super(RRCNN_block,self).__init__()
+        self.RCNN = nn.Sequential(
+            Recurrent_block(ch_out,t=t),
+            Recurrent_block(ch_out,t=t)
+        )
+        self.Conv_1x1 = nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=1,padding=0)
+    def forward(self,x):
+        x = self.Conv_1x1(x)
+        x1 = self.RCNN(x)
+        return x+x1
+class single_conv(nn.Module):
+    def __init__(self,ch_in,ch_out):
+        super(single_conv,self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        x = self.conv(x)
+        return x
+class Attention_block(nn.Module):
+    def __init__(self,F_g,F_l,F_int):
+        super(Attention_block,self).__init__()
+        self.W_g = nn.Sequential(
+            nn.Conv2d(F_g, F_int, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(F_int)
+            )
+        self.W_x = nn.Sequential(
+            nn.Conv2d(F_l, F_int, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(F_int)
+        )
+        self.psi = nn.Sequential(
+            nn.Conv2d(F_int, 1, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(1),
+            nn.Sigmoid()
+        )
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self,g,x):
+        g1 = self.W_g(g)
+        x1 = self.W_x(x)
+        psi = self.relu(g1+x1)
+        psi = self.psi(psi)
+        return x*psi
+class U_Net(nn.Module):
+    def __init__(self,img_ch=3,output_ch=1):
+        super(U_Net,self).__init__()
+        self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
+        self.Conv1 = conv_block(ch_in=img_ch,ch_out=64)
+        self.Conv2 = conv_block(ch_in=64,ch_out=128)
+        self.Conv3 = conv_block(ch_in=128,ch_out=256)
+        self.Conv4 = conv_block(ch_in=256,ch_out=512)
+        self.Conv5 = conv_block(ch_in=512,ch_out=1024)
+        self.Up5 = up_conv(ch_in=1024,ch_out=512)
+        self.Up_conv5 = conv_block(ch_in=1024, ch_out=512)
+        self.Up4 = up_conv(ch_in=512,ch_out=256)
+        self.Up_conv4 = conv_block(ch_in=512, ch_out=256)
+        self.Up3 = up_conv(ch_in=256,ch_out=128)
+        self.Up_conv3 = conv_block(ch_in=256, ch_out=128)
+        self.Up2 = up_conv(ch_in=128,ch_out=64)
+        self.Up_conv2 = conv_block(ch_in=128, ch_out=64)
+        self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0)
+    def forward(self,x):
+        # encoding path
+        x1 = self.Conv1(x)
+        x2 = self.Maxpool(x1)
+        x2 = self.Conv2(x2)
+        x3 = self.Maxpool(x2)
+        x3 = self.Conv3(x3)
+        x4 = self.Maxpool(x3)
+        x4 = self.Conv4(x4)
+        x5 = self.Maxpool(x4)
+        x5 = self.Conv5(x5)
+        # decoding + concat path
+        d5 = self.Up5(x5)
+        d5 = torch.cat((x4,d5),dim=1)
+        d5 = self.Up_conv5(d5)
+        d4 = self.Up4(d5)
+        d4 = torch.cat((x3,d4),dim=1)
+        d4 = self.Up_conv4(d4)
+        d3 = self.Up3(d4)
+        d3 = torch.cat((x2,d3),dim=1)
+        d3 = self.Up_conv3(d3)
+        d2 = self.Up2(d3)
+        d2 = torch.cat((x1,d2),dim=1)
+        d2 = self.Up_conv2(d2)
+        d1 = self.Conv_1x1(d2)
+        return d1
+class R2U_Net(nn.Module):
+    def __init__(self,img_ch=3,output_ch=1,t=2):
+        super(R2U_Net,self).__init__()
+        self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
+        self.Upsample = nn.Upsample(scale_factor=2)
+        self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t)
+        self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t)
+        self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t)
+        self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t)
+        self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t)
+        self.Up5 = up_conv(ch_in=1024,ch_out=512)
+        self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t)
+        self.Up4 = up_conv(ch_in=512,ch_out=256)
+        self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t)
+        self.Up3 = up_conv(ch_in=256,ch_out=128)
+        self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t)
+        self.Up2 = up_conv(ch_in=128,ch_out=64)
+        self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t)
+        self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0)
+    def forward(self,x):
+        # encoding path
+        x1 = self.RRCNN1(x)
+        x2 = self.Maxpool(x1)
+        x2 = self.RRCNN2(x2)
+        x3 = self.Maxpool(x2)
+        x3 = self.RRCNN3(x3)
+        x4 = self.Maxpool(x3)
+        x4 = self.RRCNN4(x4)
+        x5 = self.Maxpool(x4)
+        x5 = self.RRCNN5(x5)
+        # decoding + concat path
+        d5 = self.Up5(x5)
+        d5 = torch.cat((x4,d5),dim=1)
+        d5 = self.Up_RRCNN5(d5)
+        d4 = self.Up4(d5)
+        d4 = torch.cat((x3,d4),dim=1)
+        d4 = self.Up_RRCNN4(d4)
+        d3 = self.Up3(d4)
+        d3 = torch.cat((x2,d3),dim=1)
+        d3 = self.Up_RRCNN3(d3)
+        d2 = self.Up2(d3)
+        d2 = torch.cat((x1,d2),dim=1)
+        d2 = self.Up_RRCNN2(d2)
+        d1 = self.Conv_1x1(d2)
+        return d1
+class AttU_Net(nn.Module):
+    def __init__(self,img_ch=3,output_ch=1):
+        super(AttU_Net,self).__init__()
+        self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
+        self.Conv1 = conv_block(ch_in=img_ch,ch_out=64)
+        self.Conv2 = conv_block(ch_in=64,ch_out=128)
+        self.Conv3 = conv_block(ch_in=128,ch_out=256)
+        self.Conv4 = conv_block(ch_in=256,ch_out=512)
+        self.Conv5 = conv_block(ch_in=512,ch_out=1024)
+        self.Up5 = up_conv(ch_in=1024,ch_out=512)
+        self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256)
+        self.Up_conv5 = conv_block(ch_in=1024, ch_out=512)
+        self.Up4 = up_conv(ch_in=512,ch_out=256)
+        self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128)
+        self.Up_conv4 = conv_block(ch_in=512, ch_out=256)
+        self.Up3 = up_conv(ch_in=256,ch_out=128)
+        self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64)
+        self.Up_conv3 = conv_block(ch_in=256, ch_out=128)
+        self.Up2 = up_conv(ch_in=128,ch_out=64)
+        self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32)
+        self.Up_conv2 = conv_block(ch_in=128, ch_out=64)
+        self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0)
+    def forward(self,x):
+        # encoding path
+        x1 = self.Conv1(x)
+        x2 = self.Maxpool(x1)
+        x2 = self.Conv2(x2)
+        x3 = self.Maxpool(x2)
+        x3 = self.Conv3(x3)
+        x4 = self.Maxpool(x3)
+        x4 = self.Conv4(x4)
+        x5 = self.Maxpool(x4)
+        x5 = self.Conv5(x5)
+        # decoding + concat path
+        d5 = self.Up5(x5)
+        x4 = self.Att5(g=d5,x=x4)
+        d5 = torch.cat((x4,d5),dim=1)
+        d5 = self.Up_conv5(d5)
+        d4 = self.Up4(d5)
+        x3 = self.Att4(g=d4,x=x3)
+        d4 = torch.cat((x3,d4),dim=1)
+        d4 = self.Up_conv4(d4)
+        d3 = self.Up3(d4)
+        x2 = self.Att3(g=d3,x=x2)
+        d3 = torch.cat((x2,d3),dim=1)
+        d3 = self.Up_conv3(d3)
+        d2 = self.Up2(d3)
+        x1 = self.Att2(g=d2,x=x1)
+        d2 = torch.cat((x1,d2),dim=1)
+        d2 = self.Up_conv2(d2)
+        d1 = self.Conv_1x1(d2)
+        return d1
+class R2AttU_Net(nn.Module):
+    def __init__(self,img_ch=3,output_ch=1,t=2):
+        super(R2AttU_Net,self).__init__()
+        self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
+        self.Upsample = nn.Upsample(scale_factor=2)
+        self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t)
+        self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t)
+        self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t)
+        self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t)
+        self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t)
+        self.Up5 = up_conv(ch_in=1024,ch_out=512)
+        self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256)
+        self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t)
+        self.Up4 = up_conv(ch_in=512,ch_out=256)
+        self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128)
+        self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t)
+        self.Up3 = up_conv(ch_in=256,ch_out=128)
+        self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64)
+        self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t)
+        self.Up2 = up_conv(ch_in=128,ch_out=64)
+        self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32)
+        self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t)
+        self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0)
+    def forward(self,x):
+        # encoding path
+        x1 = self.RRCNN1(x)
+        x2 = self.Maxpool(x1)
+        x2 = self.RRCNN2(x2)
+        x3 = self.Maxpool(x2)
+        x3 = self.RRCNN3(x3)
+        x4 = self.Maxpool(x3)
+        x4 = self.RRCNN4(x4)
+        x5 = self.Maxpool(x4)
+        x5 = self.RRCNN5(x5)
+        # decoding + concat path
+        d5 = self.Up5(x5)
+        x4 = self.Att5(g=d5,x=x4)
+        d5 = torch.cat((x4,d5),dim=1)
+        d5 = self.Up_RRCNN5(d5)
+        d4 = self.Up4(d5)
+        x3 = self.Att4(g=d4,x=x3)
+        d4 = torch.cat((x3,d4),dim=1)
+        d4 = self.Up_RRCNN4(d4)
+        d3 = self.Up3(d4)
+        x2 = self.Att3(g=d3,x=x2)
+        d3 = torch.cat((x2,d3),dim=1)
+        d3 = self.Up_RRCNN3(d3)
+        d2 = self.Up2(d3)
+        x1 = self.Att2(g=d2,x=x1)
+        d2 = torch.cat((x1,d2),dim=1)
+        d2 = self.Up_RRCNN2(d2)
+        d1 = self.Conv_1x1(d2)
+        return d1

models/multiresunet.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# https://github.com/j-sripad/mulitresunet-pytorch/blob/main/multiresunet.py
+from typing import Tuple, Dict
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+class Multiresblock(nn.Module):
+  def __init__(self,input_features : int, corresponding_unet_filters : int ,alpha : float =1.67)->None:
+    """
+        MultiResblock
+        Arguments:
+          x - input layer
+          corresponding_unet_filters - Unet filters for the same stage
+          alpha - 1.67 - factor used in the paper to dervie number of filters for multiresunet filters from Unet filters
+        Returns - None
+    """
+    super().__init__()
+    self.corresponding_unet_filters = corresponding_unet_filters
+    self.alpha = alpha
+    self.W = corresponding_unet_filters * alpha
+    self.conv2d_bn_1x1 = Conv2d_batchnorm(input_features=input_features,num_of_filters = int(self.W*0.167)+int(self.W*0.333)+int(self.W*0.5),
+    kernel_size = (1,1),activation='None',padding = 0)
+    self.conv2d_bn_3x3 = Conv2d_batchnorm(input_features=input_features,num_of_filters = int(self.W*0.167),
+    kernel_size = (3,3),activation='relu',padding = 1)
+    self.conv2d_bn_5x5 = Conv2d_batchnorm(input_features=int(self.W*0.167),num_of_filters = int(self.W*0.333),
+    kernel_size = (3,3),activation='relu',padding = 1)
+    self.conv2d_bn_7x7 = Conv2d_batchnorm(input_features=int(self.W*0.333),num_of_filters = int(self.W*0.5),
+    kernel_size = (3,3),activation='relu',padding = 1)
+    self.batch_norm1 = nn.BatchNorm2d(int(self.W*0.5)+int(self.W*0.167)+int(self.W*0.333) ,affine=False)
+  def forward(self,x: torch.Tensor)->torch.Tensor:
+    temp = self.conv2d_bn_1x1(x)
+    a = self.conv2d_bn_3x3(x)
+    b = self.conv2d_bn_5x5(a)
+    c = self.conv2d_bn_7x7(b)
+    x = torch.cat([a,b,c],axis=1)
+    x = self.batch_norm1(x)
+    x = x + temp
+    x = self.batch_norm1(x)
+    return x
+class Conv2d_batchnorm(nn.Module):
+  def __init__(self,input_features : int,num_of_filters : int ,kernel_size : Tuple = (2,2),stride : Tuple = (1,1), activation : str = 'relu',padding  : int= 0)->None:
+    """
+    Arguments:
+      x - input layer
+      num_of_filters - no. of filter outputs
+      filters - shape of the filters to be used
+      stride - stride dimension
+      activation -activation function to be used
+    Returns - None
+    """
+    super().__init__()
+    self.activation = activation
+    self.conv1 = nn.Conv2d(in_channels=input_features,out_channels=num_of_filters,kernel_size=kernel_size,stride=stride,padding = padding)
+    self.batchnorm = nn.BatchNorm2d(num_of_filters,affine=False)
+  def forward(self,x : torch.Tensor)->torch.Tensor:
+    x = self.conv1(x)
+    x = self.batchnorm(x)
+    if self.activation == 'relu':
+      return F.relu(x)
+    else:
+      return x
+class Respath(nn.Module):
+  def __init__(self,input_features : int,filters : int,respath_length : int)->None:
+    """
+    Arguments:
+    input_features - input layer filters
+    filters - output channels
+    respath_length - length of the Respath
+    Returns - None
+    """
+    super().__init__()
+    self.filters = filters
+    self.respath_length = respath_length
+    self.conv2d_bn_1x1 = Conv2d_batchnorm(input_features=input_features,num_of_filters = self.filters,
+    kernel_size = (1,1),activation='None',padding = 0)
+    self.conv2d_bn_3x3 = Conv2d_batchnorm(input_features=input_features,num_of_filters = self.filters,
+    kernel_size = (3,3),activation='relu',padding = 1)
+    self.conv2d_bn_1x1_common = Conv2d_batchnorm(input_features=self.filters,num_of_filters = self.filters,
+    kernel_size = (1,1),activation='None',padding = 0)
+    self.conv2d_bn_3x3_common = Conv2d_batchnorm(input_features=self.filters,num_of_filters = self.filters,
+    kernel_size = (3,3),activation='relu',padding = 1)
+    self.batch_norm1 = nn.BatchNorm2d(filters,affine=False)
+  def forward(self,x : torch.Tensor)->torch.Tensor:
+    shortcut = self.conv2d_bn_1x1(x)
+    x = self.conv2d_bn_3x3(x)
+    x = x + shortcut
+    x = F.relu(x)
+    x = self.batch_norm1(x)
+    if self.respath_length>1:
+      for i in range(self.respath_length):
+        shortcut = self.conv2d_bn_1x1_common(x)
+        x = self.conv2d_bn_3x3_common(x)
+        x = x + shortcut
+        x = F.relu(x)
+        x = self.batch_norm1(x)
+      return x
+    else:
+      return x
+class MultiResUnet(nn.Module):
+  def __init__(self,channels : int,filters : int =32,nclasses : int =1)->None:
+    """
+    Arguments:
+    channels - input image channels
+    filters - filters to begin with (Unet)
+    nclasses - number of classes
+    Returns - None
+    """
+    super().__init__()
+    self.alpha = 1.67
+    self.filters = filters
+    self.nclasses = nclasses
+    self.multiresblock1 = Multiresblock(input_features=channels,corresponding_unet_filters=self.filters)
+    self.pool1 =  nn.MaxPool2d(2,stride= 2)
+    self.in_filters1 = int(self.filters*self.alpha* 0.5)+int(self.filters*self.alpha*0.167)+int(self.filters*self.alpha*0.333)
+    self.respath1 = Respath(input_features=self.in_filters1 ,filters=self.filters,respath_length=4)
+    self.multiresblock2 = Multiresblock(input_features= self.in_filters1,corresponding_unet_filters=self.filters*2)
+    self.pool2 =  nn.MaxPool2d(2, 2)
+    self.in_filters2 = int(self.filters*2*self.alpha* 0.5)+int(self.filters*2*self.alpha*0.167)+int(self.filters*2*self.alpha*0.333)
+    self.respath2 = Respath(input_features=self.in_filters2,filters=self.filters*2,respath_length=3)
+    self.multiresblock3 = Multiresblock(input_features= self.in_filters2,corresponding_unet_filters=self.filters*4)
+    self.pool3 =  nn.MaxPool2d(2, 2)
+    self.in_filters3 = int(self.filters*4*self.alpha* 0.5)+int(self.filters*4*self.alpha*0.167)+int(self.filters*4*self.alpha*0.333)
+    self.respath3 = Respath(input_features=self.in_filters3,filters=self.filters*4,respath_length=2)
+    self.multiresblock4 = Multiresblock(input_features= self.in_filters3,corresponding_unet_filters=self.filters*8)
+    self.pool4 =  nn.MaxPool2d(2, 2)
+    self.in_filters4 = int(self.filters*8*self.alpha* 0.5)+int(self.filters*8*self.alpha*0.167)+int(self.filters*8*self.alpha*0.333)
+    self.respath4 = Respath(input_features=self.in_filters4,filters=self.filters*8,respath_length=1)
+    self.multiresblock5 = Multiresblock(input_features= self.in_filters4,corresponding_unet_filters=self.filters*16)
+    self.in_filters5 = int(self.filters*16*self.alpha* 0.5)+int(self.filters*16*self.alpha*0.167)+int(self.filters*16*self.alpha*0.333)
+    #Decoder path
+    self.upsample6 = nn.ConvTranspose2d(in_channels=self.in_filters5,out_channels=self.filters*8,kernel_size=(2,2),stride=(2,2),padding = 0)
+    self.concat_filters1 = self.filters*8+self.filters*8
+    self.multiresblock6 = Multiresblock(input_features=self.concat_filters1,corresponding_unet_filters=self.filters*8)
+    self.in_filters6 = int(self.filters*8*self.alpha* 0.5)+int(self.filters*8*self.alpha*0.167)+int(self.filters*8*self.alpha*0.333)
+    self.upsample7 = nn.ConvTranspose2d(in_channels=self.in_filters6,out_channels=self.filters*4,kernel_size=(2,2),stride=(2,2),padding = 0)
+    self.concat_filters2 = self.filters*4+self.filters*4
+    self.multiresblock7 = Multiresblock(input_features=self.concat_filters2,corresponding_unet_filters=self.filters*4)
+    self.in_filters7 = int(self.filters*4*self.alpha* 0.5)+int(self.filters*4*self.alpha*0.167)+int(self.filters*4*self.alpha*0.333)
+    self.upsample8 = nn.ConvTranspose2d(in_channels=self.in_filters7,out_channels=self.filters*2,kernel_size=(2,2),stride=(2,2),padding = 0)
+    self.concat_filters3 = self.filters*2+self.filters*2
+    self.multiresblock8 = Multiresblock(input_features=self.concat_filters3,corresponding_unet_filters=self.filters*2)
+    self.in_filters8 = int(self.filters*2*self.alpha* 0.5)+int(self.filters*2*self.alpha*0.167)+int(self.filters*2*self.alpha*0.333)
+    self.upsample9 = nn.ConvTranspose2d(in_channels=self.in_filters8,out_channels=self.filters,kernel_size=(2,2),stride=(2,2),padding = 0)
+    self.concat_filters4 = self.filters+self.filters
+    self.multiresblock9 = Multiresblock(input_features=self.concat_filters4,corresponding_unet_filters=self.filters)
+    self.in_filters9 = int(self.filters*self.alpha* 0.5)+int(self.filters*self.alpha*0.167)+int(self.filters*self.alpha*0.333)
+    self.conv_final = Conv2d_batchnorm(input_features=self.in_filters9,num_of_filters = self.nclasses,
+    kernel_size = (1,1),activation='None')
+  def forward(self,x : torch.Tensor)->torch.Tensor:
+    x_multires1 = self.multiresblock1(x)
+    x_pool1 = self.pool1(x_multires1)
+    x_multires1 = self.respath1(x_multires1)
+    x_multires2 = self.multiresblock2(x_pool1)
+    x_pool2 = self.pool2(x_multires2)
+    x_multires2 = self.respath2(x_multires2)
+    x_multires3 = self.multiresblock3(x_pool2)
+    x_pool3 = self.pool3(x_multires3)
+    x_multires3 = self.respath3(x_multires3)
+    x_multires4 = self.multiresblock4(x_pool3)
+    x_pool4 = self.pool4(x_multires4)
+    x_multires4 = self.respath4(x_multires4)
+    x_multires5 = self.multiresblock5(x_pool4)
+    up6 = torch.cat([self.upsample6(x_multires5),x_multires4],axis=1)
+    x_multires6 = self.multiresblock6(up6)
+    up7 = torch.cat([self.upsample7(x_multires6),x_multires3],axis=1)
+    x_multires7 = self.multiresblock7(up7)
+    up8 = torch.cat([self.upsample8(x_multires7),x_multires2],axis=1)
+    x_multires8 = self.multiresblock8(up8)
+    up9 = torch.cat([self.upsample9(x_multires8),x_multires1],axis=1)
+    x_multires9 = self.multiresblock9(up9)
+    if self.nclasses > 1:
+      conv_final_layer =  self.conv_final(x_multires9)
+    else:
+      conv_final_layer =  torch.sigmoid(self.conv_final(x_multires9))
+    return conv_final_layer

models/unet.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+from torch import nn
+class DoubleConv(nn.Module):
+    def __init__(self, in_channels, out_channels, with_bn=False):
+        super().__init__()
+        if with_bn:
+            self.step = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+        else:
+            self.step = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+                nn.ReLU(),
+                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+                nn.ReLU(),
+            )
+    def forward(self, x):
+        return self.step(x)
+class UNet(nn.Module):
+    def __init__(self, in_channels, out_channels, with_bn=False):
+        super().__init__()
+        init_channels = 32
+        self.out_channels = out_channels
+        self.en_1 = DoubleConv(in_channels    , init_channels  , with_bn)
+        self.en_2 = DoubleConv(1*init_channels, 2*init_channels, with_bn)
+        self.en_3 = DoubleConv(2*init_channels, 4*init_channels, with_bn)
+        self.en_4 = DoubleConv(4*init_channels, 8*init_channels, with_bn)
+        self.de_1 = DoubleConv((4 + 8)*init_channels, 4*init_channels, with_bn)
+        self.de_2 = DoubleConv((2 + 4)*init_channels, 2*init_channels, with_bn)
+        self.de_3 = DoubleConv((1 + 2)*init_channels, 1*init_channels, with_bn)
+        self.de_4 = nn.Conv2d(init_channels, out_channels, 1)
+        self.maxpool = nn.MaxPool2d(kernel_size=2)
+        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear')
+    def forward(self, x):
+        e1 = self.en_1(x)
+        e2 = self.en_2(self.maxpool(e1))
+        e3 = self.en_3(self.maxpool(e2))
+        e4 = self.en_4(self.maxpool(e3))
+        d1 = self.de_1(torch.cat([self.upsample(e4), e3], dim=1))
+        d2 = self.de_2(torch.cat([self.upsample(d1), e2], dim=1))
+        d3 = self.de_3(torch.cat([self.upsample(d2), e1], dim=1))
+        d4 = self.de_4(d3)
+        return d4
+#         if self.out_channels<2:
+#             return torch.sigmoid(d4)
+#         return torch.softmax(d4, 1)

models/unetpp.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# https://github.com/4uiiurz1/pytorch-nested-unet/blob/master/archs.py (unetpp)
+import torch
+from torch import nn
+from torch.nn.functional import softmax, sigmoid
+__all__ = ['UNet', 'NestedUNet']
+class VGGBlock(nn.Module):
+    def __init__(self, in_channels, middle_channels, out_channels):
+        super().__init__()
+        self.relu = nn.ReLU(inplace=True)
+        self.conv1 = nn.Conv2d(in_channels, middle_channels, 3, padding=1)
+        self.bn1 = nn.BatchNorm2d(middle_channels)
+        self.conv2 = nn.Conv2d(middle_channels, out_channels, 3, padding=1)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        return out
+class UNet(nn.Module):
+    def __init__(self, num_classes, input_channels=3, **kwargs):
+        super().__init__()
+        nb_filter = [32, 64, 128, 256, 512]
+        self.pool = nn.MaxPool2d(2, 2)
+        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.conv0_0 = VGGBlock(input_channels, nb_filter[0], nb_filter[0])
+        self.conv1_0 = VGGBlock(nb_filter[0], nb_filter[1], nb_filter[1])
+        self.conv2_0 = VGGBlock(nb_filter[1], nb_filter[2], nb_filter[2])
+        self.conv3_0 = VGGBlock(nb_filter[2], nb_filter[3], nb_filter[3])
+        self.conv4_0 = VGGBlock(nb_filter[3], nb_filter[4], nb_filter[4])
+        self.conv3_1 = VGGBlock(nb_filter[3]+nb_filter[4], nb_filter[3], nb_filter[3])
+        self.conv2_2 = VGGBlock(nb_filter[2]+nb_filter[3], nb_filter[2], nb_filter[2])
+        self.conv1_3 = VGGBlock(nb_filter[1]+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv0_4 = VGGBlock(nb_filter[0]+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.final = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+    def forward(self, input):
+        x0_0 = self.conv0_0(input)
+        x1_0 = self.conv1_0(self.pool(x0_0))
+        x2_0 = self.conv2_0(self.pool(x1_0))
+        x3_0 = self.conv3_0(self.pool(x2_0))
+        x4_0 = self.conv4_0(self.pool(x3_0))
+        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
+        x2_2 = self.conv2_2(torch.cat([x2_0, self.up(x3_1)], 1))
+        x1_3 = self.conv1_3(torch.cat([x1_0, self.up(x2_2)], 1))
+        x0_4 = self.conv0_4(torch.cat([x0_0, self.up(x1_3)], 1))
+        output = self.final(x0_4)
+        return output
+class NestedUNet(nn.Module):
+    def __init__(self, num_classes, input_channels=3, deep_supervision=False, **kwargs):
+        super().__init__()
+        nb_filter = [32, 64, 128, 256, 512]
+        self.deep_supervision = deep_supervision
+        self.pool = nn.MaxPool2d(2, 2)
+        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.conv0_0 = VGGBlock(input_channels, nb_filter[0], nb_filter[0])
+        self.conv1_0 = VGGBlock(nb_filter[0], nb_filter[1], nb_filter[1])
+        self.conv2_0 = VGGBlock(nb_filter[1], nb_filter[2], nb_filter[2])
+        self.conv3_0 = VGGBlock(nb_filter[2], nb_filter[3], nb_filter[3])
+        self.conv4_0 = VGGBlock(nb_filter[3], nb_filter[4], nb_filter[4])
+        self.conv0_1 = VGGBlock(nb_filter[0]+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_1 = VGGBlock(nb_filter[1]+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv2_1 = VGGBlock(nb_filter[2]+nb_filter[3], nb_filter[2], nb_filter[2])
+        self.conv3_1 = VGGBlock(nb_filter[3]+nb_filter[4], nb_filter[3], nb_filter[3])
+        self.conv0_2 = VGGBlock(nb_filter[0]*2+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_2 = VGGBlock(nb_filter[1]*2+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv2_2 = VGGBlock(nb_filter[2]*2+nb_filter[3], nb_filter[2], nb_filter[2])
+        self.conv0_3 = VGGBlock(nb_filter[0]*3+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_3 = VGGBlock(nb_filter[1]*3+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv0_4 = VGGBlock(nb_filter[0]*4+nb_filter[1], nb_filter[0], nb_filter[0])
+        if self.deep_supervision:
+            self.final1 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+            self.final2 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+            self.final3 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+            self.final4 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+        else:
+            self.final = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
+    def forward(self, input):
+        x0_0 = self.conv0_0(input)
+        x1_0 = self.conv1_0(self.pool(x0_0))
+        x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))
+        x2_0 = self.conv2_0(self.pool(x1_0))
+        x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
+        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))
+        x3_0 = self.conv3_0(self.pool(x2_0))
+        x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
+        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
+        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))
+        x4_0 = self.conv4_0(self.pool(x3_0))
+        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
+        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))
+        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))
+        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))
+        if self.deep_supervision:
+            output1 = self.final1(x0_1)
+            output2 = self.final2(x0_2)
+            output3 = self.final3(x0_3)
+            output4 = self.final4(x0_4)
+            return [output1, output2, output3, output4]
+        else:
+            output = self.final(x0_4)
+            return output

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+torchvision
+gradio
+numpy
+Pillow
+pyyaml

saved_models/isic2018_unet/best_model_state_dict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8559876e4b85070ffbe84748e02926a9e8690b09bf101a12f7e5c5e590decbf0
+size 7799041

saved_models/segpc2021_unet/best_model_state_dict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a182b4f7a415d056ae7e5293aed483804494580eb3f1a3b27d04e77c55468e76
+size 7800193