Spaces:

koesan
/

Manga-Panel-LayoutGAN

Running

App Files Files Community

koesan commited on Oct 13, 2025

Commit

66a1d29

1 Parent(s): 513de3e

Initial commit: Manga Layout Generator with model

Browse files

Files changed (11) hide show

.gitattributes +1 -0
app.py +229 -0
model/__pycache__/layoutganpp.cpython-38.pyc +0 -0
model/__pycache__/layoutnet.cpython-38.pyc +0 -0
model/__pycache__/util.cpython-38.pyc +0 -0
model/layoutganpp.py +93 -0
model/layoutnet.py +64 -0
model/util.py +35 -0
model_best.pth.tar +3 -0
requirements.txt +6 -0
util.py +104 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pth.tar filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import os
+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image, ImageDraw
+import pickle
+# Model imports
+from model.layoutganpp import Generator
+from util import set_seed
+# Configuration
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_PATH = "model_best.pth.tar"
+# Load model
+def load_model():
+    """Load pretrained LayoutGAN++ model"""
+    if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError(f"Model file not found: {MODEL_PATH}. Please ensure model_best.pth.tar is in the same directory.")
+    checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
+    args = checkpoint['args']
+    # Initialize model
+    num_label = 6  # For manga panels
+    model = Generator(args['latent_size'], num_label,
+                     d_model=args['G_d_model'],
+                     nhead=args['G_nhead']).to(DEVICE)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.eval()
+    return model, args
+# Initialize model
+print("Loading model...")
+model, model_args = load_model()
+print("Model loaded successfully!")
+def convert_layout_to_image(bbox, canvas_size=(256, 256)):
+    """Convert bounding boxes to visualization image"""
+    W, H = canvas_size
+    img = Image.new('RGB', (W, H), color=(255, 255, 255))
+    draw = ImageDraw.Draw(img, 'RGBA')
+    # Colors for different panels
+    colors = [
+        (255, 100, 100, 180),  # Red
+        (100, 255, 100, 180),  # Green
+        (100, 100, 255, 180),  # Blue
+        (255, 255, 100, 180),  # Yellow
+        (255, 100, 255, 180),  # Magenta
+        (100, 255, 255, 180),  # Cyan
+        (255, 150, 100, 180),  # Orange
+        (150, 100, 255, 180),  # Purple
+        (100, 255, 150, 180),  # Light Green
+        (255, 100, 150, 180),  # Pink
+        (150, 255, 100, 180),  # Lime
+        (100, 150, 255, 180),  # Light Blue
+    ]
+    # Sort by area (largest first)
+    areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in bbox]
+    indices = sorted(range(len(areas)), key=lambda i: areas[i], reverse=True)
+    for idx, i in enumerate(indices):
+        x1, y1, x2, y2 = bbox[i]
+        x1 = int(x1 * W)
+        y1 = int(y1 * H)
+        x2 = int(x2 * W)
+        y2 = int(y2 * H)
+        color = colors[idx % len(colors)]
+        draw.rectangle([x1, y1, x2, y2], fill=color, outline=(0, 0, 0), width=2)
+        # Add panel number
+        text = f"Panel {idx + 1}"
+        text_bbox = draw.textbbox((0, 0), text)
+        text_w = text_bbox[2] - text_bbox[0]
+        text_h = text_bbox[3] - text_bbox[1]
+        text_x = x1 + (x2 - x1 - text_w) // 2
+        text_y = y1 + (y2 - y1 - text_h) // 2
+        draw.text((text_x, text_y), text, fill=(0, 0, 0))
+    return img
+def xywh_to_ltrb(bbox):
+    """Convert from center format (xc, yc, w, h) to corners (x1, y1, x2, y2)"""
+    xc, yc, w, h = bbox
+    x1 = xc - w / 2
+    y1 = yc - h / 2
+    x2 = xc + w / 2
+    y2 = yc + h / 2
+    return [x1, y1, x2, y2]
+def generate_manga_layout(num_panels, seed=None):
+    """Generate manga panel layout"""
+    try:
+        if seed is not None:
+            set_seed(seed)
+        # Clamp num_panels
+        num_panels = max(1, min(12, num_panels))
+        # Create input
+        z = torch.randn(1, num_panels, model_args['latent_size'], device=DEVICE)
+        label = torch.zeros(1, num_panels, dtype=torch.long, device=DEVICE)
+        padding_mask = torch.zeros(1, num_panels, dtype=torch.bool, device=DEVICE)
+        # Generate layout
+        with torch.no_grad():
+            bbox = model(z, label, padding_mask)
+        # Convert to numpy
+        bbox = bbox[0].cpu().numpy()
+        # Convert from xywh to ltrb
+        bbox_ltrb = [xywh_to_ltrb(b) for b in bbox]
+        # Clip to [0, 1]
+        bbox_ltrb = [[max(0, min(1, coord)) for coord in box] for box in bbox_ltrb]
+        # Create visualization
+        img = convert_layout_to_image(bbox_ltrb, canvas_size=(512, 512))
+        info = f"✅ Generated layout with {num_panels} panels\n"
+        info += f"📐 Canvas size: 512x512px\n"
+        info += f"🎨 Panel colors are randomly assigned"
+        return img, info
+    except Exception as e:
+        error_img = Image.new('RGB', (512, 512), color=(255, 200, 200))
+        draw = ImageDraw.Draw(error_img)
+        draw.text((20, 250), f"Error: {str(e)}", fill=(255, 0, 0))
+        return error_img, f"❌ Error: {str(e)}"
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="Manga Panel Layout Generator") as demo:
+    gr.Markdown("""
+    # 🎨 Manga Panel Layout Generator
+    **AI-powered manga panel position prediction using LayoutGAN++**
+    This tool automatically generates optimal panel layouts for manga pages. Simply select the number of panels,
+    and the AI will predict the best arrangement based on training from thousands of manga pages.
+    ### ���� Links
+    - 📚 [GitHub Repository](https://github.com/koesan/Manga-Panel-LayoutGAN)
+    - 🤗 [Hugging Face Space](https://huggingface.co/spaces/koesan/manga-layout-generator)
+    - 📄 [LayoutGAN++ Paper](https://arxiv.org/abs/1908.07785)
+    ### 📖 About
+    This project uses **LayoutGAN++**, a transformer-based GAN architecture, trained on manga panel data
+    from the [MangaZero dataset](https://huggingface.co/datasets/jianzongwu/MangaZero). It predicts
+    panel positions and sizes to create aesthetically pleasing manga page layouts.
+    **Why?** Automating manga panel layout can help manga artists, designers, and AI systems generate
+    structured comic pages more efficiently.
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            num_panels = gr.Slider(
+                minimum=1,
+                maximum=12,
+                value=3,
+                step=1,
+                label="Number of Panels",
+                info="Select how many panels you want (1-12)"
+            )
+            seed = gr.Number(
+                label="Random Seed (Optional)",
+                value=None,
+                precision=0,
+                info="Leave empty for random generation"
+            )
+            generate_btn = gr.Button("🎨 Generate Layout", variant="primary", size="lg")
+            gr.Markdown("""
+            ### 💡 Tips
+            - **3-4 panels**: Simple, clean layouts
+            - **5-6 panels**: Standard manga page
+            - **7-12 panels**: Complex, dynamic layouts
+            Click generate multiple times for different variations!
+            """)
+        with gr.Column(scale=2):
+            output_image = gr.Image(label="Generated Layout", type="pil", height=512)
+            output_info = gr.Textbox(label="Generation Info", lines=3)
+    # Examples
+    gr.Examples(
+        examples=[
+            [3, None],
+            [6, None],
+            [8, None],
+            [4, 42],
+        ],
+        inputs=[num_panels, seed],
+        outputs=[output_image, output_info],
+        fn=generate_manga_layout,
+        cache_examples=False,
+        label="📋 Try These Examples"
+    )
+    # Event handler
+    generate_btn.click(
+        fn=generate_manga_layout,
+        inputs=[num_panels, seed],
+        outputs=[output_image, output_info]
+    )
+    gr.Markdown("""
+    ---
+    ### 🙏 Credits
+    - **Dataset**: [MangaZero](https://huggingface.co/datasets/jianzongwu/MangaZero) by [jianzongwu](https://github.com/jianzongwu)
+    - **Model**: [LayoutGAN++](https://arxiv.org/abs/1908.07785)
+    - **Framework**: PyTorch, Gradio, Hugging Face Spaces
+    Made with ❤️ for the manga and AI community
+    """)
+if __name__ == "__main__":
+    demo.launch()

model/__pycache__/layoutganpp.cpython-38.pyc ADDED Viewed

Binary file (2.72 kB). View file

model/__pycache__/layoutnet.cpython-38.pyc ADDED Viewed

Binary file (2.02 kB). View file

model/__pycache__/util.cpython-38.pyc ADDED Viewed

Binary file (1.18 kB). View file

model/layoutganpp.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn as nn
+from model.util import TransformerWithToken
+class Generator(nn.Module):
+    def __init__(self, dim_latent, num_label,
+                 d_model=512, nhead=8, num_layers=4):
+        super().__init__()
+        self.fc_z = nn.Linear(dim_latent, d_model // 2)
+        self.emb_label = nn.Embedding(num_label, d_model // 2)
+        self.fc_in = nn.Linear(d_model, d_model)
+        te = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,
+                                        dim_feedforward=d_model // 2)
+        self.transformer = nn.TransformerEncoder(te, num_layers=num_layers)
+        self.fc_out = nn.Linear(d_model, 4)
+    def forward(self, z, label, padding_mask):
+        z = self.fc_z(z)
+        l = self.emb_label(label)
+        x = torch.cat([z, l], dim=-1)
+        x = torch.relu(self.fc_in(x)).permute(1, 0, 2)
+        x = self.transformer(x, src_key_padding_mask=padding_mask)
+        x = self.fc_out(x.permute(1, 0, 2))
+        x = torch.sigmoid(x)
+        return x
+class Discriminator(nn.Module):
+    def __init__(self, num_label, d_model=512,
+                 nhead=8, num_layers=4, max_bbox=50):
+        super().__init__()
+        # encoder
+        self.emb_label = nn.Embedding(num_label, d_model)
+        self.fc_bbox = nn.Linear(4, d_model)
+        self.enc_fc_in = nn.Linear(d_model * 2, d_model)
+        self.enc_transformer = TransformerWithToken(d_model=d_model,
+                                                    dim_feedforward=d_model // 2,
+                                                    nhead=nhead, num_layers=num_layers)
+        self.fc_out_disc = nn.Linear(d_model, 1)
+        # decoder
+        self.pos_token = nn.Parameter(torch.rand(max_bbox, 1, d_model))
+        self.dec_fc_in = nn.Linear(d_model * 2, d_model)
+        te = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,
+                                        dim_feedforward=d_model // 2)
+        self.dec_transformer = nn.TransformerEncoder(te,
+                                                     num_layers=num_layers)
+        self.fc_out_cls = nn.Linear(d_model, num_label)
+        self.fc_out_bbox = nn.Linear(d_model, 4)
+    def forward(self, bbox, label, padding_mask, reconst=False):
+        B, N, _ = bbox.size()
+        b = self.fc_bbox(bbox)
+        l = self.emb_label(label)
+        x = self.enc_fc_in(torch.cat([b, l], dim=-1))
+        x = torch.relu(x).permute(1, 0, 2)
+        x = self.enc_transformer(x, src_key_padding_mask=padding_mask)
+        x = x[0]
+        # logit_disc: [B,]
+        logit_disc = self.fc_out_disc(x).squeeze(-1)
+        if not reconst:
+            return logit_disc
+        else:
+            x = x.unsqueeze(0).expand(N, -1, -1)
+            t = self.pos_token[:N].expand(-1, B, -1)
+            x = torch.cat([x, t], dim=-1)
+            x = torch.relu(self.dec_fc_in(x))
+            x = self.dec_transformer(x, src_key_padding_mask=padding_mask)
+            x = x.permute(1, 0, 2)[~padding_mask]
+            # logit_cls: [M, L]    bbox_pred: [M, 4]
+            logit_cls = self.fc_out_cls(x)
+            bbox_pred = torch.sigmoid(self.fc_out_bbox(x))
+            return logit_disc, logit_cls, bbox_pred

model/layoutnet.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+import torch.nn as nn
+from model.util import TransformerWithToken
+class LayoutNet(nn.Module):
+    def __init__(self, num_label):
+        super().__init__()
+        d_model = 256
+        nhead = 4
+        num_layers = 4
+        max_bbox = 50
+        # encoder
+        self.emb_label = nn.Embedding(num_label, d_model)
+        self.fc_bbox = nn.Linear(4, d_model)
+        self.enc_fc_in = nn.Linear(d_model * 2, d_model)
+        self.enc_transformer = TransformerWithToken(d_model=d_model,
+                                                    dim_feedforward=d_model // 2,
+                                                    nhead=nhead, num_layers=num_layers)
+        self.fc_out_disc = nn.Linear(d_model, 1)
+        # decoder
+        self.pos_token = nn.Parameter(torch.rand(max_bbox, 1, d_model))
+        self.dec_fc_in = nn.Linear(d_model * 2, d_model)
+        te = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,
+                                        dim_feedforward=d_model // 2)
+        self.dec_transformer = nn.TransformerEncoder(te, num_layers=num_layers)
+        self.fc_out_cls = nn.Linear(d_model, num_label)
+        self.fc_out_bbox = nn.Linear(d_model, 4)
+    def extract_features(self, bbox, label, padding_mask):
+        b = self.fc_bbox(bbox)
+        l = self.emb_label(label)
+        x = self.enc_fc_in(torch.cat([b, l], dim=-1))
+        x = torch.relu(x).permute(1, 0, 2)
+        x = self.enc_transformer(x, padding_mask)
+        return x[0]
+    def forward(self, bbox, label, padding_mask):
+        B, N, _ = bbox.size()
+        x = self.extract_features(bbox, label, padding_mask)
+        logit_disc = self.fc_out_disc(x).squeeze(-1)
+        x = x.unsqueeze(0).expand(N, -1, -1)
+        t = self.pos_token[:N].expand(-1, B, -1)
+        x = torch.cat([x, t], dim=-1)
+        x = torch.relu(self.dec_fc_in(x))
+        x = self.dec_transformer(x, src_key_padding_mask=padding_mask)
+        x = x.permute(1, 0, 2)[~padding_mask]
+        # logit_cls: [M, L]    bbox_pred: [M, 4]
+        logit_cls = self.fc_out_cls(x)
+        bbox_pred = torch.sigmoid(self.fc_out_bbox(x))
+        return logit_disc, logit_cls, bbox_pred

model/util.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+import torch.nn as nn
+class TransformerWithToken(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward, num_layers):
+        super().__init__()
+        self.token = nn.Parameter(torch.randn(1, 1, d_model))
+        token_mask = torch.zeros(1, 1, dtype=torch.bool)
+        self.register_buffer('token_mask', token_mask)
+        self.core = nn.TransformerEncoder(
+            nn.TransformerEncoderLayer(
+                d_model=d_model, nhead=nhead,
+                dim_feedforward=dim_feedforward,
+            ), num_layers=num_layers)
+    def forward(self, x, src_key_padding_mask):
+        # x: [N, B, E]
+        # padding_mask: [B, N]
+        #   `False` for valid values
+        #   `True` for padded values
+        B = x.size(1)
+        token = self.token.expand(-1, B, -1)
+        x = torch.cat([token, x], dim=0)
+        token_mask = self.token_mask.expand(B, -1)
+        padding_mask = torch.cat([token_mask, src_key_padding_mask], dim=1)
+        x = self.core(x, src_key_padding_mask=padding_mask)
+        return x

model_best.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c7da885a431c1fcacedf9616eb9d43bc3ae4dee2eaf76b4ac531080c6932059
+size 99615356

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==4.19.2
+torch==1.8.1
+torchvision==0.9.1
+numpy==1.21.0
+Pillow==9.5.0
+scipy==1.7.3

util.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import json
+import random
+import shutil
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from PIL import Image, ImageDraw
+import torch
+import torchvision.utils as vutils
+import torchvision.transforms as T
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    print("Random Seed:", seed)
+def init_experiment(args, prefix):
+    if args.seed is None:
+        args.seed = random.randint(0, 10000)
+    set_seed(args.seed)
+    if not args.name:
+        args.name = datetime.now().strftime('%Y%m%d%H%M%S%f')
+    out_dir = Path('output') / args.dataset / prefix / args.name
+    out_dir.mkdir(parents=True, exist_ok=True)
+    json_path = out_dir / 'args.json'
+    with json_path.open('w') as f:
+        json.dump(vars(args), f, indent=2)
+    return out_dir
+def save_checkpoint(state, is_best, out_dir):
+    out_path = Path(out_dir) / 'checkpoint.pth.tar'
+    torch.save(state, out_path)
+    if is_best:
+        best_path = Path(out_dir) / 'model_best.pth.tar'
+        shutil.copyfile(out_path, best_path)
+def convert_xywh_to_ltrb(bbox):
+    xc, yc, w, h = bbox
+    x1 = xc - w / 2
+    y1 = yc - h / 2
+    x2 = xc + w / 2
+    y2 = yc + h / 2
+    return [x1, y1, x2, y2]
+def convert_layout_to_image(boxes, labels, colors, canvas_size):
+    H, W = canvas_size
+    img = Image.new('RGB', (int(W), int(H)), color=(255, 255, 255))
+    draw = ImageDraw.Draw(img, 'RGBA')
+    # draw from larger boxes
+    area = [b[2] * b[3] for b in boxes]
+    indices = sorted(range(len(area)),
+                     key=lambda i: area[i],
+                     reverse=True)
+    for i in indices:
+        bbox, color = boxes[i], colors[labels[i]]
+        c_fill = color + (100,)
+        x1, y1, x2, y2 = convert_xywh_to_ltrb(bbox)
+        x1, x2 = x1 * (W - 1), x2 * (W - 1)
+        y1, y2 = y1 * (H - 1), y2 * (H - 1)
+        draw.rectangle([x1, y1, x2, y2],
+                       outline=color,
+                       fill=c_fill)
+    return img
+def save_image(batch_boxes, batch_labels, batch_mask,
+               dataset_colors, out_path, canvas_size=(60, 40),
+               nrow=None):
+    # batch_boxes: [B, N, 4]
+    # batch_labels: [B, N]
+    # batch_mask: [B, N]
+    imgs = []
+    B = batch_boxes.size(0)
+    to_tensor = T.ToTensor()
+    for i in range(B):
+        mask_i = batch_mask[i]
+        boxes = batch_boxes[i][mask_i]
+        labels = batch_labels[i][mask_i]
+        img = convert_layout_to_image(boxes, labels,
+                                      dataset_colors,
+                                      canvas_size)
+        imgs.append(to_tensor(img))
+    image = torch.stack(imgs)
+    if nrow is None:
+        nrow = int(np.ceil(np.sqrt(B)))
+    vutils.save_image(image, out_path, normalize=False, nrow=nrow)