Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
README.md +71 -0
gif_for_readme.gif +3 -0
main.py +75 -0
model.py +113 -0
trainer.py +82 -0
utils.py +124 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+gif_for_readme.gif filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,71 @@

+<div align="center">
+  <h1>Radon Averaging</h1>
+  <h3>A Practical Approach for Designing Rotation-Invariant Models</h3>
+  <a href="https://www.python.org/">
+    <img src="https://img.shields.io/badge/Python-3.10+-blue?logo=python&style=flat-square" alt="Python Badge"/>
+  </a>
+  &nbsp;&nbsp;
+  <a href="https://pytorch.org/">
+    <img src="https://img.shields.io/badge/PyTorch-2.0+-EE4C2C?logo=pytorch&style=flat-square" alt="PyTorch Badge"/>
+  </a>
+  &nbsp;&nbsp;
+  <a href="https://doi.org/10.1016/j.engappai.2025.113299">
+    <img src="https://img.shields.io/badge/EAAI%202026-Published-success?style=flat-square" alt="EAAI Badge"/>
+  </a>
+  &nbsp;&nbsp;
+  <a href="https://www.elsevier.com/">
+    <img src="https://img.shields.io/badge/Elsevier-Journal-orange?style=flat-square" alt="Elsevier Badge"/>
+  </a>
+  <br/><br/>
+  <!-- Radon Transform Animation -->
+  <img src="./gif_for_readme.gif" width="700px"/>
+</div>
+---
+## Engineering Applications of Artificial Intelligence (EAAI 2026)
+### Pytorch Implementation
+This repository contains a pytorch implementation of **Radon Averaging (RA)** from the paper:
+> **Radon Averaging: A practical approach for designing rotation-invariant models**
+> Jangwon Kim, Sanghyun Ryoo, Jiwon Kim, Junkee Hong, Soohee Han
+> *Engineering Applications of Artificial Intelligence*, Volume 164, 2026
+## 📄 Paper Link
+> **DOI:** https://doi.org/10.1016/j.engappai.2025.113299
+> **Journal:** Engineering Applications of Artificial Intelligence
+---
+## Radon Averaging
+Radon Averaging achieves rotation invariance by:
+1. **Radon Transform** (ℛ): Converts images ($I$) to sinograms, where an rotation corresponds ($$g$$) to a circular shift.
+2. **Averaging over Discrete Rotations** ($$G$$): Eliminates boundary artifacts via group averaging
+3. **Standard CNN Backbone** ($$Φ$$): No architectural changes required
+```math
+RA_G^Φ(I) = \frac{1}{|G|} \sum_{g \in G} (Φ \circ π(g) \circ ℛ)(I)
+```
+---
+## Advantages
+- **Plug-and-play**: works with standard (pretrained) CNN backbones (no architectural changes).
+- **Rotation invariance in practice**: stable representations under image rotations.
+- **Reduces boundary artifacts**: group averaging mitigates Radon transform edge effects.
+---
+## Citation Example
+```
+@article{kim2026radonaveraging,
+  title   = {Radon Averaging: A practical approach for designing rotation-invariant models},
+  author  = {Kim, Jangwon and Ryoo, Sanghyun and Kim, Jiwon and Hong, Junkee and Han, Soohee},
+  journal = {Engineering Applications of Artificial Intelligence},
+  volume  = {164},
+  pages   = {113299},
+  year    = {2026},
+  doi     = {10.1016/j.engappai.2025.113299}
+}
+```

gif_for_readme.gif ADDED Viewed

Git LFS Details

SHA256: 9282f945446d00ba50d01c0d287a4d7b1bbf398bdd468ddbad17094ad5907c93
Pointer size: 132 Bytes
Size of remote file: 4.55 MB

main.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import argparse
+import torch
+from model import RA
+from trainer import train
+import random
+import warnings
+warnings.filterwarnings('ignore')
+from utils import *
+def parse_args():
+    parser = argparse.ArgumentParser(description="Train & Test with configurable args")
+    parser.add_argument("--num_train_data", type=int, default=10000, help="number of training samples")
+    parser.add_argument("--batch_size", type=int, default=64, help="batch size")
+    parser.add_argument("--epochs", type=int, default=50, help="number of epochs")
+    parser.add_argument("--group", type=str, default="C8", help="group name for RA model (C4 or C8)")
+    parser.add_argument("--n_seeds", type=int, default=10, help="number of seeds to run")
+    return parser.parse_args()
+def train_and_test(model, train_loader, val_loader, test_loader, epochs, device):
+    score, f1 = train(
+        model=model,
+        epochs=epochs,
+        train_loader=train_loader,
+        val_loader=val_loader,
+        test_loader=test_loader,
+        lr=1e-4,
+        wd=1e-4,
+        device=device,
+    )
+    return score, f1
+def main(args):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    base_seed = random.randint(1, 999)
+    accs = []
+    f1s = []
+    print_run_config(args, device, base_seed)
+    for seed in range(base_seed, base_seed + args.n_seeds):
+        train_dataset = MnistDataset(num_train_data=args.num_train_data, mode="train", seed=seed)
+        validation_dataset = MnistDataset(mode="validation", seed=seed)
+        test_dataset = MnistDataset(mode="test", seed=seed)
+        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size)
+        val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size)
+        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size)
+        model = RA(group=args.group).to(device)
+        score, f1 = train_and_test(
+            model=model,
+            train_loader=train_loader,
+            val_loader=val_loader,
+            test_loader=test_loader,
+            epochs=args.epochs,
+            device=device,
+        )
+        accs.append(score)
+        f1s.append(f1)
+    a_m, a_std = cal_mean_std(accs)
+    print(f"[Acc] Mean: {a_m}  |  Std: {a_std}\n")
+    f_m, f_std = cal_mean_std(f1s)
+    print(f"[F-Score] Mean: {f_m}  |  Std: {f_std}\n")
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)

model.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from __future__ import division
+import copy
+import torch
+import torchvision.transforms.functional as TF
+from torchvision import models
+from torchvision.transforms import Pad
+from torchvision.transforms import Resize
+from torchvision.transforms import ToTensor
+from skimage.transform import radon
+from PIL import Image
+import numpy as np
+WIDTH = 29
+THETA = np.linspace(0.0, 360, WIDTH, endpoint=False)
+class RA(torch.nn.Module):
+    def __init__(self, group='C4'):
+        super(RA, self).__init__()
+        self.backbone = MnistCNN()
+        self.group = group
+        self.resize1 = Resize(87)  # to upsample
+        self.resize2 = Resize(29)  # to downsample
+        self.totensor = ToTensor()
+    def forward(self, x):
+        y = 0
+        x = x.to('cpu')
+        org_x = copy.deepcopy(x)
+        if self.group == 'C1':
+            G = [0]
+        elif self.group == 'C4':
+            G = [0, 90, 180, 270]
+        elif self.group == 'C8':
+            G = [0, 45, 90, 135, 180, 225, 270, 315]
+        else:
+            raise NameError
+        if self.group != 'C1':
+            for r in G:
+                x = x.to('cpu')
+                for i in range(x.shape[0]):
+                    img = org_x[i][0].numpy()
+                    img = Image.fromarray(img, mode='F')
+                    np_x = self.totensor(self.resize2(self.resize1(img).rotate(r, Image.BILINEAR))).numpy()
+                    sinogram = radon(np_x[0], theta=THETA)
+                    sinogram = torch.FloatTensor(sinogram).reshape(1, 29, 29)
+                    x[i] = sinogram
+                x = x.to('cuda')
+                y += self.backbone(x)
+            y /= len(G)
+            return y
+        else:
+            x = x.to('cpu')
+            for i in range(x.shape[0]):
+                np_x = org_x[i][0].numpy()
+                sinogram = radon(np_x, theta=THETA)
+                sinogram = torch.FloatTensor(sinogram).reshape(1, 29, 29)
+                x[i] = sinogram
+            x = x.to('cuda')
+            y = self.backbone(x)
+            return y
+class MnistCNN(torch.nn.Module):
+    def __init__(self):
+        super(MnistCNN, self).__init__()
+        self.keep_prob = 0.5
+        self.layer1 = torch.nn.Sequential(
+            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(kernel_size=2, stride=2))
+        self.layer2 = torch.nn.Sequential(
+            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(kernel_size=2, stride=2))
+        self.layer3 = torch.nn.Sequential(
+            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))
+        self.fc1 = torch.nn.Linear(2048, 128, bias=True)
+        self.layer4 = torch.nn.Sequential(
+            self.fc1,
+            torch.nn.ReLU(),
+            torch.nn.Dropout(p=1 - self.keep_prob))
+        self.fc2 = torch.nn.Linear(128, 10, bias=True)
+        self._initialize_weights()
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
+                torch.nn.init.orthogonal_(m.weight)
+                if m.bias is not None:
+                    torch.nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        out = self.layer1(x)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = out.view(out.size(0), -1)
+        out = self.layer4(out)
+        out = self.fc2(out)
+        return out

trainer.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import copy
+import os
+import torch
+import numpy as np
+from tqdm.auto import tqdm
+from PIL import Image
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset
+from torchvision.transforms import Pad
+from torchvision.transforms import Resize
+from torchvision.transforms import ToTensor
+from skimage.transform import radon, rescale
+from scipy.ndimage import rotate
+import random
+from sklearn.metrics import f1_score
+def test(model, test_loader, device, final_test=False):
+    # test over the full rotated test set
+    total = 0
+    correct = 0
+    all_predictions = []
+    all_targets = []
+    with torch.no_grad():
+        model.eval()
+        for i, (x, t) in enumerate(test_loader):
+            x = x.to(device)
+            t = t.to(device)
+            y = model(x)
+            y = y.view(-1, 10)
+            _, prediction = torch.max(y.data, 1)
+            total += t.shape[0]
+            correct += (prediction == t).sum().item()
+            # Collect predictions and targets for F1 score calculation
+            all_predictions.extend(prediction.cpu().numpy())
+            all_targets.extend(t.cpu().numpy())
+    f1 = f1_score(all_targets, all_predictions, average='weighted')
+    if final_test:
+        print(f"[Final Test] Acc: {correct/total*100.}  |  F1-Score: {f1}\n")
+    return correct/total*100.0, f1
+def train(model, epochs, train_loader, val_loader, test_loader, lr=1e-4, wd=1e-4, device='cuda'):
+    best_acc = 0.0
+    best_model = None
+    loss_function = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.backbone.parameters(), lr=lr, weight_decay=wd)
+    for epoch in range(epochs):
+        model.train()
+        for i, (x, t) in enumerate(train_loader):
+            optimizer.zero_grad()
+            x = x.to(device)
+            t = t.to(device)
+            y = model(x)
+            y = y.view(-1, 10)
+            loss = loss_function(y, t)
+            loss.backward()
+            optimizer.step()
+            del x, y, t, loss
+        if (epoch + 1) % 1 == 0:
+            accuracy, _ = test(model, val_loader,  device=device)
+            print(f"epoch {epoch + 1} | validation accuracy: {accuracy}")
+            if accuracy > best_acc:
+                best_acc = accuracy
+                best_model = copy.deepcopy(model.to('cpu'))
+                model = model.to('cuda')
+    print(f"Max validation accuracy: {best_acc}\n")
+    best_model = best_model.to('cuda')
+    score, f1 = test(best_model, test_loader, device=device, final_test=True)
+    del best_model
+    del model
+    return score, f1

utils.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from pathlib import Path
+import shutil
+import torch
+import numpy as np
+import tempfile
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision.transforms import Pad, Resize, ToTensor
+from skimage.transform import radon, rescale
+from scipy.ndimage import rotate
+MNIST_TRAIN_GDRIVE = "https://drive.google.com/file/d/15G2FsYGRSpEkr5MTVofSFhKaMMIeiFhk/view?usp=drive_link"
+MNIST_TEST_GDRIVE  = "https://drive.google.com/file/d/1PK1DeFpw2OomuHDoA8ZtTPWLkPHOT6u6/view?usp=drive_link"
+def _gdrive_download(url_or_id: str, out_path: Path) -> None:
+    import gdown
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.NamedTemporaryFile(delete=False, dir=str(out_path.parent), suffix=".tmp") as tf:
+        tmp_path = Path(tf.name)
+    try:
+        ok = gdown.download(url_or_id, str(tmp_path), quiet=False, fuzzy=True)
+        if not ok or not tmp_path.exists() or tmp_path.stat().st_size == 0:
+            raise RuntimeError(f"[Google Drive] Failed to download: {url_or_id}")
+        shutil.move(str(tmp_path), str(out_path))
+    finally:
+        if tmp_path.exists():
+            try:
+                tmp_path.unlink()
+            except Exception:
+                pass
+def _ensure_mnist_amat_files(mnist_dir: str = "mnist") -> None:
+    mnist_dir = Path(mnist_dir)
+    train_path = mnist_dir / "mnist_train.amat"
+    test_path = mnist_dir / "mnist_test.amat"
+    if not train_path.is_file():
+        print(f"[MNIST] '{train_path}' not found. Downloading MNIST train file from Google Drive...")
+        _gdrive_download(MNIST_TRAIN_GDRIVE, train_path)
+        print(f"[MNIST] Download complete: {train_path}")
+    if not test_path.is_file():
+        print(f"[MNIST] '{test_path}' not found. Downloading MNIST test file from Google Drive...")
+        _gdrive_download(MNIST_TEST_GDRIVE, test_path)
+        print(f"[MNIST] Download complete: {test_path}")
+class MnistDataset(Dataset):
+    def __init__(self, mode, num_train_data=10000, seed=1):
+        assert num_train_data + 12000 <= 45000
+        assert mode in ['train', 'validation', 'test']
+        if seed is not None:
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+        _ensure_mnist_amat_files("mnist")
+        if mode == "test":
+            file = "mnist/mnist_test.amat"
+        else:
+            file = "mnist/mnist_train.amat"
+        data = np.loadtxt(file)
+        images = data[:, :-1].reshape(-1, 28, 28).astype(np.float32)
+        # Shuffle the images
+        indices = np.arange(num_train_data + 12000)
+        np.random.shuffle(indices)
+        if mode == 'train':
+            images = images[indices[:num_train_data]]
+            data = data[indices[:num_train_data]]
+        elif mode == 'validation':
+            images = images[indices[num_train_data:]]
+            data = data[indices[num_train_data:]]
+        if mode == 'test' or mode == 'validation':
+            pad = Pad((0, 0, 1, 1), fill=0)
+            resize1 = Resize(87)  # to upsample
+            resize2 = Resize(29)  # to downsample
+            totensor = ToTensor()
+            self.images = torch.empty((images.shape[0], 1, 29, 29))
+            for i in range(images.shape[0]):
+                img = images[i]
+                img = Image.fromarray(img, mode='F')
+                r = (np.random.rand() * 360.)
+                self.images[i] = totensor(resize2(resize1(pad(img)).rotate(r, Image.BILINEAR))).reshape(1, 29, 29)
+        else:
+            self.images = torch.zeros((images.shape[0], 1, 29, 29))
+            self.images[:, :, :28, :28] = torch.tensor(images).reshape(-1, 1, 28, 28)
+        self.labels = data[:, -1].astype(np.int64)
+    def __getitem__(self, index):
+        image, label = self.images[index], self.labels[index]
+        return image, label
+    def __len__(self):
+        return len(self.labels)
+def cal_mean_std(ary):
+    ary_np = np.array(ary)
+    return np.mean(ary_np), np.std(ary_np)
+def print_run_config(args, device, base_seed):
+    print("\n" + "=" * 34)
+    print("RA Training Configuration")
+    print("-" * 34)
+    print(f"Device          : {device}")
+    print(f"Group           : {args.group}")
+    print(f"Epochs          : {args.epochs}")
+    print(f"Batch size      : {args.batch_size}")
+    print(f"Train samples   : {args.num_train_data}")
+    print(f"Num seeds       : {args.n_seeds}")
+    print(f"Seed range      : {base_seed} ~ {base_seed + args.n_seeds - 1}")
+    print("=" * 34 + "\n")