Add the training scripts for cloud training

Browse files

Files changed (5) hide show

Discriminators.py +0 -0
Generators.py +0 -0
models.py +121 -0
train.py +102 -0
utils.py +109 -0

Discriminators.py DELETED Viewed

File without changes

Generators.py DELETED Viewed

File without changes

models.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import torch.nn as nn
+class ResidualBlock(nn.Module):
+    def __init__(self,input_channels ) -> None:
+        super(ResidualBlock,self).__init__()
+        self.conv1 = nn.Conv2d(input_channels,input_channels,3,1,padding=1,padding_mode='reflect')
+        self.conv2 = nn.Conv2d(input_channels,input_channels,3,1,padding=1,padding_mode='reflect')
+        self.instanceNorm = nn.InstanceNorm2d(input_channels)
+        self.activation = nn.ReLU()
+    def forward(self,x):
+        original = x.copy()
+        x = self.conv1(x)
+        x = self.instanceNorm(x)
+        x = self.activation(x)
+        x = self.conv2(x)
+        x = self.instanceNorm(x)
+        return original + x
+class ContractingBlock(nn.Module):
+    def __init__(self, input_channels, use_bn=True,kernel_size=3,activation='relu') -> None:
+        super(ContractingBlock,self).__init__()
+        self.conv1 = nn.Conv2d(input_channels, input_channels*2, kernel_size,padding=1,stride=2,padding_mode='reflect')
+        self.activation = nn.ReLU() if activation == 'relu' else nn.LeakyReLU(0.2)
+        if use_bn:
+            self.normalization = nn.InstanceNorm2d(input_channels)
+        self.use_bn = use_bn
+    def forward(self,x):
+        x = self.conv1(x)
+        if self.use_bn:
+            self.normalization(x)
+        x = self.activation(x)
+        return x
+class ExpandingBlock(nn.Module):
+    def __init__(self,input_channels,use_bn=True) -> None:
+        super(ExpandingBlock, self).__init__()
+        self.conv1 = nn.ConvTranspose2d(input_channels, input_channels // 2, kernel_size=3,stride=2,padding=1,output_padding=1)
+        if use_bn:
+            self.normalization = nn.InstanceNorm2d(input_channels // 2)
+        self.use_bn = use_bn
+        self.activation = nn.ReLU()
+    def forward(self, x):
+        x = self.conv1(x)
+        if self.use_bn:
+            x = self.normalization(x)
+        x = self.activation(x)
+        return x
+class FeatureMapBlock(nn.Module):
+    def __init__(self, input_channels, output_channels) -> None:
+        super(FeatureMapBlock,self).__init__()
+        self.conv = nn.Conv2d(input_channels, output_channels,kernel_size=7,padding=3,padding_mode='reflect')
+    def forward(self,x):
+        x = self.conv(x)
+        return x
+class Generator(nn.Module):
+    def __init__(self, input_channels,output_channels, hidden_dim=64) -> None:
+        super(Generator,self).__init__()
+        self.upfeature = FeatureMapBlock(input_channels,hidden_dim)
+        self.contract1 = ContractingBlock(hidden_dim)
+        self.contract2 = ContractingBlock(hidden_dim * 2)
+        res_mult = 4
+        self.res0 = ResidualBlock(hidden_dim * res_mult)
+        self.res1 = ResidualBlock(hidden_dim * res_mult)
+        self.res2 = ResidualBlock(hidden_dim * res_mult)
+        self.res3 = ResidualBlock(hidden_dim * res_mult)
+        self.res4 = ResidualBlock(hidden_dim * res_mult)
+        self.res5 = ResidualBlock(hidden_dim * res_mult)
+        self.res6 = ResidualBlock(hidden_dim * res_mult)
+        self.res7 = ResidualBlock(hidden_dim * res_mult)
+        self.res8 = ResidualBlock(hidden_dim * res_mult)
+        self.expand1 = ExpandingBlock(hidden_dim * res_mult)
+        self.expand2 = ExpandingBlock(hidden_dim * 2)
+        self.downfeature  = FeatureMapBlock(hidden_dim,output_channels)
+        self.tanh = nn.Tanh()
+    def forward(self, x):
+        x0 = self.upfeature(x)
+        x1 = self.contract1(x0)
+        x2 = self.contract2(x1)
+        x3 = self.res0(x2)
+        x4 = self.res1(x3)
+        x5 = self.res2(x4)
+        x6 = self.res3(x5)
+        x7 = self.res4(x6)
+        x8 = self.res5(x7)
+        x9 = self.res6(x8)
+        x10 = self.res7(x9)
+        x11 = self.res8(x10)
+        x12 = self.expand1(x11)
+        x13 = self.expand2(x12)
+        xn = self.downfeature(x13)
+        return self.tanh(xn)
+class Discriminator(nn.Module):
+    def __init__(self, input_channels, hidden_channels=64) -> None:
+        super(Discriminator,self).__init__()
+        self.upfeature = FeatureMapBlock(input_channels,hidden_channels)
+        self.contract1 = ContractingBlock(hidden_channels, False,kernel_size=4,activation='lrelu')
+        self.contract2 = ContractingBlock(hidden_channels * 2,kernel_size=4,activation='lrelu')
+        self.contract3 = ContractingBlock(hidden_channels * 4,kernel_size=4,activation='lrelu')
+        self.conv = nn.Conv2d(hidden_channels*8,1,kernel_size=1)
+    def forward(self,x):
+        x0 = self.upfeature(x)
+        x1 = self.contract1(x0)
+        x2 = self.contract2(x1)
+        x3 = self.contract3(x2)
+        x4 = self.conv(x3)
+        return x4

train.py CHANGED Viewed

	@@ -0,0 +1,102 @@

+import torch.nn as nn
+from torchvision import transforms
+from utils import *
+from models import Generator , Discriminator
+from tqdm.auto import tqdm
+adv_criterion = nn.MSELoss()
+recon_criterion = nn.L1Loss()
+n_epochs = 60
+dim_A = 3
+dim_B = 3
+display_step = 200
+batch_size = 1
+lr = 0.0002
+load_shape = 286
+target_shape = 256
+device='cuda'
+transform = transforms.Compose([
+    transforms.Resize(load_shape),
+    transforms.RandomCrop(target_shape),
+    transforms.RandomHorizontalFlip(),
+    transforms.ToTensor(),
+])
+dataset = ImageDataset("horse2zebra", transform=transform)
+gen_AB = Generator(dim_A,dim_B).to(device)
+gen_BA = Generator(dim_B,dim_A).to(device)
+gen_opt = torch.optim.Adam(list(gen_AB.parameters()) + list(gen_BA.parameters()),lr = lr,betas=(0.5,0.999))
+disc_A = Discriminator(dim_A).to(device)
+disc_A_opt = torch.optim.Adam(disc_A.parameters(),lr=lr,betas=(0.5,0.999))
+disc_B = Discriminator(dim_B).to(device)
+disc_B_opt = torch.optim.Adam(disc_B.parameters(),lr=lr,betas=(0.5,0.999))
+gen_AB = gen_AB.apply(weights_init)
+gen_BA = gen_BA.apply(weights_init)
+disc_A = disc_A.apply(weights_init)
+disc_B = disc_B.apply(weights_init)
+def train():
+    mean_gen_loss = 0
+    mean_disc_loss = 0
+    dataloader = DataLoader(dataset,batch_size,shuffle=True)
+    cur_step = 0
+    for epoch in range(n_epochs):
+        for real_A,real_B in tqdm(dataloader):
+            real_A = nn.functional.interpolate(real_A,size=target_shape)
+            real_B = nn.functional.interpolate(real_B,size=target_shape)
+            cur_batch_size = len(real_A)
+            real_A = real_A.to(device)
+            real_B = real_B.to(device)
+            disc_A_opt.zero_grad()
+            with torch.no_grad():
+                fake_A = gen_BA(real_A)
+            disc_A_loss =  get_disc_loss(real_A,fake_A,disc_A,adv_criterion)
+            disc_A_loss.backward(retain_graph=True)
+            disc_A_opt.step()
+            disc_B_opt.zero_grad()
+            with torch.no_grad():
+                fake_B = gen_AB(real_B)
+            disc_B_loss = get_disc_loss(real_B,fake_B,disc_B,adv_criterion)
+            disc_B_loss.backward(retain_graph=True)
+            disc_B_opt.step()
+            gen_opt.zero_grad()
+            gen_loss ,fake_A,fake_B= get_gen_loss(real_A,real_B,gen_AB,gen_BA,disc_B,disc_A,adv_criterion=,identity_criterion=recon_criterion,cycle_criterion=recon_criterion)
+            gen_loss.backward()
+            gen_opt.step()
+            mean_gen_loss += gen_loss.item() / display_step
+            mean_disc_loss += disc_A_loss.item() / display_step
+            if cur_step % display_step == 0 and cur_step > 0:
+                print(f"Epoch: {epoch} | Step: {cur_step} | Gen_loss: {mean_gen_loss} | Disc_loss: {mean_disc_loss} |")
+                show_tensor_images(torch.cat([real_A,real_B]),size=(dim_A,target_shape,target_shape))
+                show_tensor_images(torch.cat([fake_A,fake_B]),size=(dim_B,target_shape,target_shape))
+                mean_gen_loss = 0
+                mean_disc_loss = 0
+                torch.save({
+                    'gen_AB': gen_AB,
+                    'gen_BA': gen_BA,
+                    'gen_opt': gen_opt,
+                    'disc_A': disc_A,
+                    'disc_A_opt': disc_A_opt,
+                    'disc_B': disc_B,
+                    'disc_B_opt': disc_B_opt
+                }, f"checkpoints/cycleGAN_{cur_step}.pth")
+            cur_step += 1
+if __name__ == "__main__":
+    train()

utils.py CHANGED Viewed

	@@ -0,0 +1,109 @@

+from torchvision import transforms
+import torch
+import torch.nn as nn
+from torchvision.utils import make_grid
+from torch.utils.data import DataLoader
+import matplotlib.pyplot as plt
+import glob
+import os
+from torch.utils.data import Dataset
+from PIL import Image
+def show_tensor_images(image_tensor, num_images=25, size=(1, 28, 28)):
+    '''
+    Function for visualizing images: Given a tensor of images, number of images, and
+    size per image, plots and prints the images in an uniform grid.
+    '''
+    image_tensor = (image_tensor + 1) / 2
+    image_shifted = image_tensor
+    image_unflat = image_shifted.detach().cpu().view(-1, *size)
+    image_grid = make_grid(image_unflat[:num_images], nrow=5)
+    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
+    plt.show()
+class ImageDataset(Dataset):
+    def __init__(self, root, transform=None, mode='train'):
+        self.transform = transform
+        self.files_A = sorted(glob.glob(os.path.join(root, '%sA' % mode) + '/*.*'))
+        self.files_B = sorted(glob.glob(os.path.join(root, '%sB' % mode) + '/*.*'))
+        if len(self.files_A) > len(self.files_B):
+            self.files_A, self.files_B = self.files_B, self.files_A
+        self.new_perm()
+        assert len(self.files_A) > 0, "Make sure you downloaded the horse2zebra images!"
+    def new_perm(self):
+        self.randperm = torch.randperm(len(self.files_B))[:len(self.files_A)]
+    def __getitem__(self, index):
+        item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)]))
+        item_B = self.transform(Image.open(self.files_B[self.randperm[index]]))
+        if item_A.shape[0] != 3:
+            item_A = item_A.repeat(3, 1, 1)
+        if item_B.shape[0] != 3:
+            item_B = item_B.repeat(3, 1, 1)
+        if index == len(self) - 1:
+            self.new_perm()
+        # Old versions of PyTorch didn't support normalization for different-channeled images
+        return (item_A - 0.5) * 2, (item_B - 0.5) * 2
+    def __len__(self):
+        return min(len(self.files_A), len(self.files_B))
+def weights_init(m):
+    if isinstance(m,nn.Conv2d) or isinstance(m,nn.ConvTranspose2d):
+        torch.nn.init.normal_(m.weight,1.0,0.2)
+    if isinstance(m, nn.BatchNorm2d):
+        torch.nn.init.normal_(m.weight, 0.0, 0.02)
+        torch.nn.init.constant_(m.bias, 0)
+def get_disc_loss(real_X, fake_X,disc_X, adv_criterion):
+    real_pred = disc_X(real_X.detach())
+    disc_real_loss = adv_criterion(real_pred,torch.ones_like(real_pred))
+    fake_pred = disc_X(fake_X.deatch())
+    disc_fake_loss = adv_criterion(fake_pred.detach(),torch.zeros_like(fake_pred))
+    disc_loss = (disc_real_loss + disc_fake_loss) / 2
+    return disc_loss
+def get_gen_adversarial_loss(real_X, disc_Y, gen_XY, adv_criterion):
+    fake_Y = gen_XY(real_X.detach())
+    disc_pred = disc_Y(fake_Y)
+    adverserial_loss = adv_criterion(disc_pred,torch.ones_like(disc_pred))
+    return adverserial_loss,fake_Y
+def get_identity_loss(real_X, gen_YX,identity_criterion):
+    identity_X = gen_YX(real_X)
+    identity_loss = identity_criterion(identity_X,real_X)
+    return identity_loss,identity_X
+def get_cycle_consistency_loss(real_X, fake_Y, gen_YX, cycle_criterion):
+    cycle_X = gen_YX(fake_Y)
+    cycle_loss = cycle_criterion(cycle_X,real_X)
+    return cycle_loss,cycle_X
+def get_gen_loss(real_A, real_B,gen_AB,gen_BA,disc_B,disc_A,adv_criterion,cycle_criterion,identity_criterion,lambda_identity=0.2,lambda_cycle=10):
+    adv_loss_BA, fake_A = get_gen_adversarial_loss(real_B, disc_A, gen_BA, adv_criterion)
+    adv_loss_AB, fake_B = get_gen_adversarial_loss(real_A, disc_B, gen_AB, adv_criterion)
+    gen_adversarial_loss = adv_loss_BA + adv_loss_AB
+    # Identity Loss -- get_identity_loss(real_X, gen_YX, identity_criterion)
+    identity_loss_A, identity_A = get_identity_loss(real_A, gen_BA, identity_criterion)
+    identity_loss_B, identity_B = get_identity_loss(real_B, gen_AB, identity_criterion)
+    gen_identity_loss = identity_loss_A + identity_loss_B
+    # Cycle-consistency Loss -- get_cycle_consistency_loss(real_X, fake_Y, gen_YX, cycle_criterion)
+    cycle_loss_BA, cycle_A = get_cycle_consistency_loss(real_A, fake_B, gen_BA, cycle_criterion)
+    cycle_loss_AB, cycle_B = get_cycle_consistency_loss(real_B, fake_A, gen_AB, cycle_criterion)
+    gen_cycle_loss = cycle_loss_BA + cycle_loss_AB
+    # Total loss
+    gen_loss = lambda_identity * gen_identity_loss + lambda_cycle * gen_cycle_loss + gen_adversarial_loss
+    return gen_loss , fake_A,fake_B