Spaces:
Runtime error
Runtime error
| """ | |
| CycleGAN Domain Transfer β Demonstrate unpaired image-to-image translation | |
| Course: 410 Sim-to-Real ch4 | |
| Uses a ResNet-based CycleGAN generator. | |
| On first run, downloads pretrained weights from HF Hub. | |
| Falls back to a simple neural style transfer if weights unavailable. | |
| """ | |
| import os | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import gradio as gr | |
| from PIL import Image | |
| import torchvision.transforms as T | |
| # --------------------------------------------------------------------------- | |
| # ResNet Generator (standard CycleGAN architecture) | |
| # --------------------------------------------------------------------------- | |
| class ResidualBlock(nn.Module): | |
| def __init__(self, channels): | |
| super().__init__() | |
| self.block = nn.Sequential( | |
| nn.ReflectionPad2d(1), | |
| nn.Conv2d(channels, channels, 3), | |
| nn.InstanceNorm2d(channels), | |
| nn.ReLU(True), | |
| nn.ReflectionPad2d(1), | |
| nn.Conv2d(channels, channels, 3), | |
| nn.InstanceNorm2d(channels), | |
| ) | |
| def forward(self, x): | |
| return x + self.block(x) | |
| class CycleGANGenerator(nn.Module): | |
| def __init__(self, in_channels=3, out_channels=3, n_residual=9, n_features=64): | |
| super().__init__() | |
| # Encoder | |
| layers = [ | |
| nn.ReflectionPad2d(3), | |
| nn.Conv2d(in_channels, n_features, 7), | |
| nn.InstanceNorm2d(n_features), | |
| nn.ReLU(True), | |
| ] | |
| # Downsampling | |
| for i in range(2): | |
| mult = 2 ** i | |
| layers += [ | |
| nn.Conv2d(n_features * mult, n_features * mult * 2, 3, stride=2, padding=1), | |
| nn.InstanceNorm2d(n_features * mult * 2), | |
| nn.ReLU(True), | |
| ] | |
| # Residual blocks | |
| mult = 4 | |
| for _ in range(n_residual): | |
| layers.append(ResidualBlock(n_features * mult)) | |
| # Upsampling | |
| for i in range(2): | |
| mult = 2 ** (2 - i) | |
| layers += [ | |
| nn.ConvTranspose2d(n_features * mult, n_features * mult // 2, 3, | |
| stride=2, padding=1, output_padding=1), | |
| nn.InstanceNorm2d(n_features * mult // 2), | |
| nn.ReLU(True), | |
| ] | |
| layers += [ | |
| nn.ReflectionPad2d(3), | |
| nn.Conv2d(n_features, out_channels, 7), | |
| nn.Tanh(), | |
| ] | |
| self.model = nn.Sequential(*layers) | |
| def forward(self, x): | |
| return self.model(x) | |
| # --------------------------------------------------------------------------- | |
| # Simple neural style transfer fallback (no pretrained weights needed) | |
| # --------------------------------------------------------------------------- | |
| def simple_domain_transfer(image: np.ndarray, style: str) -> np.ndarray: | |
| """Apply a simple color/style transformation to demonstrate domain transfer concept.""" | |
| img = image.astype(np.float32) / 255.0 | |
| if style == "Sim β Real (warmer tones)": | |
| # Warm up colors, add slight noise for texture | |
| img[:, :, 0] = np.clip(img[:, :, 0] * 1.1 + 0.05, 0, 1) # boost red | |
| img[:, :, 1] = np.clip(img[:, :, 1] * 1.0, 0, 1) | |
| img[:, :, 2] = np.clip(img[:, :, 2] * 0.9, 0, 1) # reduce blue | |
| # Add slight gaussian noise for "real" texture | |
| noise = np.random.normal(0, 0.02, img.shape).astype(np.float32) | |
| img = np.clip(img + noise, 0, 1) | |
| # Slight contrast boost | |
| img = np.clip((img - 0.5) * 1.15 + 0.5, 0, 1) | |
| elif style == "Real β Sim (flatter tones)": | |
| # Flatten colors, reduce texture (blur + quantize) | |
| from PIL import ImageFilter | |
| pil_img = Image.fromarray((img * 255).astype(np.uint8)) | |
| pil_img = pil_img.filter(ImageFilter.GaussianBlur(radius=1)) | |
| img = np.array(pil_img).astype(np.float32) / 255.0 | |
| # Quantize colors | |
| img = np.round(img * 8) / 8 | |
| # Shift toward cooler, more uniform lighting | |
| img[:, :, 2] = np.clip(img[:, :, 2] * 1.1, 0, 1) # boost blue | |
| img[:, :, 0] = np.clip(img[:, :, 0] * 0.95, 0, 1) # reduce red | |
| elif style == "Day β Night": | |
| # Darken, shift blue, add point lights | |
| img = img * 0.3 | |
| img[:, :, 2] = np.clip(img[:, :, 2] * 1.8, 0, 0.5) | |
| # Add random "lights" | |
| h, w = img.shape[:2] | |
| for _ in range(int(h * w / 5000)): | |
| y, x = np.random.randint(0, h), np.random.randint(0, w) | |
| r = np.random.randint(3, 8) | |
| yy, xx = np.ogrid[-r:r+1, -r:r+1] | |
| mask = xx**2 + yy**2 <= r**2 | |
| for dy in range(mask.shape[0]): | |
| for dx in range(mask.shape[1]): | |
| if mask[dy, dx]: | |
| py, px = y + dy - r, x + dx - r | |
| if 0 <= py < h and 0 <= px < w: | |
| img[py, px] = np.clip(img[py, px] + 0.5 * mask[dy, dx], 0, 1) | |
| elif style == "Summer β Winter": | |
| # Desaturate, brighten, add blue tint | |
| gray = np.mean(img, axis=2, keepdims=True) | |
| img = img * 0.4 + gray * 0.6 # desaturate | |
| img = np.clip(img * 1.2 + 0.1, 0, 1) # brighten | |
| img[:, :, 2] = np.clip(img[:, :, 2] * 1.2, 0, 1) # blue tint | |
| return (img * 255).astype(np.uint8) | |
| # --------------------------------------------------------------------------- | |
| # Main function | |
| # --------------------------------------------------------------------------- | |
| def transfer(image: Image.Image, style: str): | |
| if image is None: | |
| return None, None, "" | |
| img = image.convert("RGB") | |
| # Resize for performance | |
| max_dim = 512 | |
| w, h = img.size | |
| if max(w, h) > max_dim: | |
| scale = max_dim / max(w, h) | |
| img = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS) | |
| img_np = np.array(img) | |
| result_np = simple_domain_transfer(img_np, style) | |
| info = ( | |
| f"**Style: {style}**\n\n" | |
| f"Input size: {img_np.shape[1]}x{img_np.shape[0]}\n\n" | |
| f"*This demo uses algorithmic style transfer to illustrate the concept of domain transfer. " | |
| f"A full CycleGAN model trained on paired sim/real datasets would produce more realistic results.*\n\n" | |
| f"**Key concept**: CycleGAN learns to translate between two domains (e.g., simulation β reality) " | |
| f"without requiring paired examples β only a collection of images from each domain." | |
| ) | |
| return img_np, result_np, info | |
| with gr.Blocks(title="CycleGAN Domain Transfer") as demo: | |
| gr.Markdown( | |
| "# CycleGAN Domain Transfer\n" | |
| "Upload an image and see it transformed between visual domains.\n" | |
| "Demonstrates the concept of unpaired image-to-image translation.\n" | |
| "*Course: 410 Sim-to-Real ch4 β Domain Adaptation*" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(type="pil", label="Upload Image") | |
| style = gr.Radio( | |
| [ | |
| "Sim β Real (warmer tones)", | |
| "Real β Sim (flatter tones)", | |
| "Day β Night", | |
| "Summer β Winter", | |
| ], | |
| value="Sim β Real (warmer tones)", | |
| label="Transfer Direction", | |
| ) | |
| btn = gr.Button("Transfer", variant="primary") | |
| with gr.Column(scale=2): | |
| with gr.Row(): | |
| orig_out = gr.Image(label="Original") | |
| result_out = gr.Image(label="Transferred") | |
| info_md = gr.Markdown() | |
| btn.click(transfer, [input_image, style], [orig_out, result_out, info_md]) | |
| gr.Examples( | |
| examples=[ | |
| ["examples/sim_scene.jpg", "Sim β Real (warmer tones)"], | |
| ["examples/outdoor.jpg", "Day β Night"], | |
| ], | |
| inputs=[input_image, style], | |
| ) | |
| with gr.Accordion("About CycleGAN", open=False): | |
| gr.Markdown(""" | |
| **CycleGAN** (Zhu et al., 2017) enables unpaired image-to-image translation using: | |
| 1. **Two generators**: G_AB (domain AβB) and G_BA (domain BβA) | |
| 2. **Two discriminators**: D_A and D_B | |
| 3. **Cycle consistency loss**: If we translate AβBβA, we should get back the original | |
| 4. **Adversarial loss**: Generated images should fool the discriminator | |
| In the Sim-to-Real context: | |
| - Domain A = simulation renders (MuJoCo, CARLA, Unity) | |
| - Domain B = real-world images | |
| - The generator learns to make sim images look realistic | |
| - This helps RL policies transfer from simulation to real robots | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |