triumphh77's picture
Upload 13 files
f9a156f verified
import torch
import torch.nn as nn
# Simple DCGAN-style architecture for generating word images (1x32x1024)
class Generator(nn.Module):
def __init__(self, latent_dim=100, channels=1):
super(Generator, self).__init__()
# Input: latent_dim, mapping to 4x128 map initially
self.init_size_h = 4
self.init_size_w = 128
self.l1 = nn.Sequential(nn.Linear(latent_dim, 128 * self.init_size_h * self.init_size_w))
self.conv_blocks = nn.Sequential(
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=2), # 8x256
nn.Conv2d(128, 128, 3, stride=1, padding=1),
nn.BatchNorm2d(128, 0.8),
nn.LeakyReLU(0.2, inplace=True),
nn.Upsample(scale_factor=2), # 16x512
nn.Conv2d(128, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64, 0.8),
nn.LeakyReLU(0.2, inplace=True),
nn.Upsample(scale_factor=2), # 32x1024
nn.Conv2d(64, channels, 3, stride=1, padding=1),
nn.Tanh(), # Output [-1, 1]
)
def forward(self, z):
out = self.l1(z)
out = out.view(out.shape[0], 128, self.init_size_h, self.init_size_w)
img = self.conv_blocks(out)
return img
class Discriminator(nn.Module):
def __init__(self, channels=1):
super(Discriminator, self).__init__()
def discriminator_block(in_filters, out_filters, bn=True):
block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
if bn:
block.append(nn.BatchNorm2d(out_filters, 0.8))
return block
self.model = nn.Sequential(
*discriminator_block(channels, 16, bn=False), # 16x512
*discriminator_block(16, 32), # 8x256
*discriminator_block(32, 64), # 4x128
*discriminator_block(64, 128), # 2x64
)
# The height and width of downsampled image
ds_size_h = 32 // 2**4
ds_size_w = 1024 // 2**4
self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size_h * ds_size_w, 1), nn.Sigmoid())
def forward(self, img):
out = self.model(img)
out = out.view(out.shape[0], -1)
validity = self.adv_layer(out)
return validity
if __name__ == "__main__":
z = torch.randn(1, 100)
G = Generator()
D = Discriminator()
fake_img = G(z)
validity = D(fake_img)
print(f"Generator output shape: {fake_img.shape}")
print(f"Discriminator output shape: {validity.shape}")