undresser / gan.py
eoeooe's picture
Update gan.py
580137c verified
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms
import functools
# =============================================
# OPTIONS (minimal but complete for inference)
# =============================================
class Options:
def __init__(self):
# ================== PATHS ==================
self.dataroot = "" # not used in inference
self.checkpoints_dir = "checkpoints" # ← CHANGE THIS to your folder
self.model_name = "undress_net_G.pth" # ← your .pth file name
# ================== MODEL ==================
self.batchSize = 1
self.nThreads = 0
self.serial_batches = True
self.input_nc = 3
self.output_nc = 3
self.ngf = 64
self.netG = 'global' # or 'local' if you use local enhancer
self.n_downsample_global = 4
self.n_blocks_global = 9
self.n_local_enhancers = 1
self.n_blocks_local = 3
self.norm = 'instance'
self.gpu_ids = [] # [] = CPU, [0] = GPU 0, etc.
# ================== PRE/POST ==================
self.resize_or_crop = 'none' # we handle resize manually
self.loadSize = 512 # most undress pix2pixHD models were trained at 512x512
self.fineSize = 512
# =============================================
# DATASET & DATALOADER (your original + fixes)
# =============================================
class DataLoader:
def __init__(self, opt, cv_img):
self.dataset = Dataset()
self.dataset.initialize(opt, cv_img)
self.dataloader = torch.utils.data.DataLoader(
self.dataset,
batch_size=opt.batchSize,
shuffle=not opt.serial_batches,
num_workers=int(opt.nThreads))
def load_data(self):
return self.dataloader
def __len__(self):
return 1
class Dataset(torch.utils.data.Dataset):
def initialize(self, opt, cv_img):
self.opt = opt
self.A = Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
self.dataset_size = 1
def __getitem__(self, index):
transform_A = get_transform(self.opt)
A_tensor = transform_A(self.A.convert('RGB'))
input_dict = {
'label': A_tensor,
'inst': 0,
'image': 0,
'feat': 0,
'path': ""
}
return input_dict
def __len__(self):
return 1
# =============================================
# MODEL (your original + bug fixes + GPU/CPU safety)
# =============================================
class DeepModel(torch.nn.Module):
def initialize(self, opt):
self.opt = opt
self.gpu_ids = opt.gpu_ids
# Define generator
self.netG = self.__define_G(
opt.input_nc, opt.output_nc, opt.ngf, opt.netG,
opt.n_downsample_global, opt.n_blocks_global,
opt.n_local_enhancers, opt.n_blocks_local,
opt.norm, self.gpu_ids
)
# Load weights
self.__load_network(self.netG)
self.netG.eval()
def inference(self, label):
with torch.no_grad():
input_label, _, _, _ = self.__encode_input(label, infer=True)
fake_image = self.netG(input_label)
return fake_image
def __load_network(self, network):
save_path = os.path.join(self.opt.checkpoints_dir, self.opt.model_name)
if not os.path.isfile(save_path):
raise FileNotFoundError(f"Model not found: {save_path}\nPut your .pth file there!")
state_dict = torch.load(save_path, map_location='cpu' if len(self.gpu_ids) == 0 else f'cuda:{self.gpu_ids[0]}')
network.load_state_dict(state_dict)
def __encode_input(self, label_map, infer=False):
if len(self.gpu_ids) > 0:
input_label = label_map.data.cuda(self.gpu_ids[0])
else:
input_label = label_map.data
return input_label, None, None, None
def __define_G(self, input_nc, output_nc, ngf, netG, n_downsample_global, n_blocks_global,
n_local_enhancers, n_blocks_local, norm, gpu_ids):
norm_layer = self.__get_norm_layer(norm)
netG = GlobalGenerator(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, norm_layer)
if len(gpu_ids) > 0:
netG.cuda(gpu_ids[0])
return netG
def __get_norm_layer(self, norm_type='instance'):
if norm_type == 'instance':
norm_layer = functools.partial(torch.nn.InstanceNorm2d, affine=False)
elif norm_type == 'batch':
norm_layer = functools.partial(torch.nn.BatchNorm2d, affine=True)
else:
raise NotImplementedError(f'norm {norm_type} not supported')
return norm_layer
# Generator & ResnetBlock (your original - unchanged)
class GlobalGenerator(torch.nn.Module):
def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9,
norm_layer=torch.nn.BatchNorm2d, padding_type='reflect'):
assert n_blocks >= 0
super().__init__()
activation = torch.nn.ReLU(True)
model = [torch.nn.ReflectionPad2d(3),
torch.nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0),
norm_layer(ngf), activation]
# downsample
for i in range(n_downsampling):
mult = 2 ** i
model += [torch.nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1),
norm_layer(ngf * mult * 2), activation]
# resnet blocks
mult = 2 ** n_downsampling
for i in range(n_blocks):
model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, activation=activation)]
# upsample
for i in range(n_downsampling):
mult = 2 ** (n_downsampling - i)
model += [torch.nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3,
stride=2, padding=1, output_padding=1),
norm_layer(int(ngf * mult / 2)), activation]
model += [torch.nn.ReflectionPad2d(3),
torch.nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
torch.nn.Tanh()]
self.model = torch.nn.Sequential(*model)
def forward(self, input):
return self.model(input)
class ResnetBlock(torch.nn.Module):
def __init__(self, dim, padding_type, norm_layer, activation=torch.nn.ReLU(True), use_dropout=False):
super().__init__()
self.conv_block = self.__build_conv_block(dim, padding_type, norm_layer, activation, use_dropout)
def __build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout):
conv_block = []
p = 0
if padding_type == 'reflect':
conv_block += [torch.nn.ReflectionPad2d(1)]
elif padding_type == 'replicate':
conv_block += [torch.nn.ReplicationPad2d(1)]
elif padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' % padding_type)
conv_block += [torch.nn.Conv2d(dim, dim, kernel_size=3, padding=p),
norm_layer(dim),
activation]
if use_dropout:
conv_block += [torch.nn.Dropout(0.5)]
p = 0
if padding_type == 'reflect':
conv_block += [torch.nn.ReflectionPad2d(1)]
elif padding_type == 'replicate':
conv_block += [torch.nn.ReplicationPad2d(1)]
elif padding_type == 'zero':
p = 1
conv_block += [torch.nn.Conv2d(dim, dim, kernel_size=3, padding=p),
norm_layer(dim)]
return torch.nn.Sequential(*conv_block)
def forward(self, x):
out = x + self.conv_block(x)
return out
# =============================================
# TRANSFORM (improved for undress)
# =============================================
def get_transform(opt, method=Image.BICUBIC):
transform_list = []
base = float(2 ** opt.n_downsample_global)
transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base, method)))
transform_list += [transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
return transforms.Compose(transform_list)
def __make_power_2(img, base, method=Image.BICUBIC):
ow, oh = img.size
h = int(round(oh / base) * base)
w = int(round(ow / base) * base)
if (h == oh) and (w == ow):
return img
return img.resize((w, h), method)
# =============================================
# TENSOR → IMAGE
# =============================================
def tensor2im(image_tensor, imtype=np.uint8):
image_numpy = image_tensor[0].cpu().float().numpy()
image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
image_numpy = np.clip(image_numpy, 0, 255).astype(imtype)
return image_numpy
# =============================================
# MAIN FUNCTION (ready to run)
# =============================================
def undress_image(image_path, output_path="undressed.png"):
# 1. Load image
cv_img = cv2.imread(image_path)
if cv_img is None:
raise FileNotFoundError(f"Cannot read image: {image_path}")
# 2. Options & Model
opt = Options()
model = DeepModel()
model.initialize(opt)
# 3. Prepare data
data_loader = DataLoader(opt, cv_img)
dataset = data_loader.load_data()
data = next(iter(dataset)) # only 1 image
# 4. Inference
input_tensor = data['label'].unsqueeze(0) # add batch dim
fake_image = model.inference(input_tensor)
# 5. Convert to image + post-process
output_np = tensor2im(fake_image)
output_np = cv2.cvtColor(output_np, cv2.COLOR_RGB2BGR)
# Simple post-process to reduce artifacts (helps a lot)
output_np = cv2.bilateralFilter(output_np, 9, 75, 75) # skin smoothing
output_np = cv2.detailEnhance(output_np, sigma_s=10, sigma_r=0.15) # sharpen
# 6. Save
cv2.imwrite(output_path, output_np)
print(f"✅ Undressed image saved to: {output_path}")
# =============================================
# RUN EXAMPLE
# ==