Spaces:

Efesasa0
/

Sprite-Generation

Running

App Files Files Community

Efesasa0 commited on Sep 28, 2025

Commit

b6b6742

1 Parent(s): 10bce0b

✨

Browse files

Files changed (16) hide show

README.md +2 -2
app.py +130 -0
requirements.txt +6 -0
src/__init__.py +3 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/custom_dataset.cpython-312.pyc +0 -0
src/__pycache__/model.cpython-312.pyc +0 -0
src/__pycache__/model_parts.cpython-312.pyc +0 -0
src/custom_dataset.py +38 -0
src/generators.py +7 -0
src/model.py +66 -0
src/model_parts.py +89 -0
weights/sprites_model_100.pth +3 -0
weights/sprites_model_150.pth +3 -0
weights/sprites_model_199.pth +3 -0
weights/sprites_model_50.pth +3 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Sprite Generation
-emoji: 🌖
 colorFrom: blue
 colorTo: green
 sdk: gradio
@@ -10,4 +10,4 @@ pinned: false
 short_description: generation of game characther sprites from trained weights
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Sprite Generation
+emoji: 👾
 colorFrom: blue
 colorTo: green
 sdk: gradio
 short_description: generation of game characther sprites from trained weights
 ---
+Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+import torch
+import numpy as np
+import os
+from src import *
+# device setup
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# diffusion constants
+T = 3000
+beta_end = 0.02
+beta_start = 1e-3
+betas = (beta_end - beta_start) * torch.linspace(0, 1, T+1, device=device) + beta_start
+alphas = 1 - betas
+alphas_hat = torch.cumsum(alphas.log(), dim=0).exp()
+alphas_hat[0] = 1
+# -----------------------------
+# Diffusion model wrapper
+# -----------------------------
+class Diffusion:
+    def __init__(self, weights_path):
+        context_features = 5
+        features = 256
+        self.image_size = (16, 16)
+        self.model = ContextUnet(
+            in_channels=3,
+            features=features,
+            context_features=context_features,
+            image_size=self.image_size
+        ).to(device)
+        self.model.load_state_dict(torch.load(weights_path, map_location=device))
+        self.model.eval()
+    def denoise_add_noise(self, x, t, pred_noise, z=None):
+        if z is None:
+            z = torch.randn_like(x)
+        noise = betas.sqrt()[t] * z
+        mean = (x - pred_noise * ((1 - alphas[t]) / (1 - alphas_hat[t]).sqrt())) / alphas[t].sqrt()
+        return mean + noise
+    @torch.no_grad()
+    def sample_ddpm(self, n_sample, context):
+        samples = torch.randn(n_sample, 3, self.image_size[0], self.image_size[1]).to(device)
+        for i in range(T, 0, -1):
+            t = torch.tensor([i / T])[:, None, None, None].to(device)
+            z = torch.randn_like(samples) if i > 1 else 0
+            eps = self.model(samples, t, c=context)
+            samples = self.denoise_add_noise(samples, i, eps, z)
+        return samples
+    def denoise_ddim(self, x, t, t_prev, pred_noise):
+        ab = alphas_hat[t]
+        ab_prev = alphas_hat[t_prev]
+        x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)
+        dir_xt = (1 - ab_prev).sqrt() * pred_noise
+        return x0_pred + dir_xt
+    @torch.no_grad()
+    def sample_ddim(self, n_sample, context, n=20):
+        samples = torch.randn(n_sample, 3, self.image_size[0], self.image_size[1]).to(device)
+        step_size = T // n
+        for i in range(T, 0, -step_size):
+            t = torch.tensor([i / T])[:, None, None, None].to(device)
+            eps = self.model(samples, t, c=context)
+            prev_i = max(i - step_size, 1)
+            samples = self.denoise_ddim(samples, i, prev_i, eps)
+        return samples
+    def generate(self, context, mode="ddim"):
+        ctx = torch.tensor(context).float().unsqueeze(0).to(device)
+        if mode == "ddpm":
+            return self.sample_ddpm(1, ctx)
+        else:
+            return self.sample_ddim(1, ctx, n=25)
+# -----------------------------
+# Gradio Interface
+# -----------------------------
+# list weights in folder
+weights_folder = "weights"
+os.makedirs(weights_folder, exist_ok=True)
+available_weights = [f for f in os.listdir(weights_folder) if f.endswith(".pth")]
+import torch.nn.functional as F
+def run_inference(weights_name, mode, context_choice):
+    weights_path = os.path.join(weights_folder, weights_name)
+    diffusion = Diffusion(weights_path)
+    context_map = {
+        "hero": [1,0,0,0,0],
+        "non-hero": [0,1,0,0,0],
+        "food": [0,0,1,0,0],
+        "spell": [0,0,0,1,0],
+        "side-facing": [0,0,0,0,1],
+    }
+    context = context_map[context_choice]
+    samples = diffusion.generate(context=context, mode=mode)
+    # take the [0]th sample
+    img = samples[0].unsqueeze(0)  # shape (1, 3, 16, 16)
+    # upscale to 256×256 (use 'nearest' to keep blocky pixel-art style)
+    img_up = F.interpolate(img, size=(256, 256), mode="nearest")
+    img_np = img_up[0].detach().cpu().numpy()
+    img_np = (img_np - img_np.min()) / (img_np.max() - img_np.min())  # normalize [0,1]
+    img_np = np.transpose(img_np, (1,2,0))  # (H,W,C) for display
+    return img_np
+with gr.Blocks() as demo:
+    gr.Markdown("## Sprite Diffusion Generator 👾")
+    gr.Markdown("Note: DDPM algorihm may take around 1-2 minutes.")
+    with gr.Row():
+        weights_name = gr.Dropdown(available_weights, label="Select weights file")
+        mode = gr.Radio(["ddpm", "ddim"], value="ddim", label="Generation Mode")
+        context_choice = gr.Dropdown(["hero","non-hero","food","spell","side-facing"], value="hero", label="Context")
+    run_btn = gr.Button("Generate")
+    output = gr.Image(label="Generated Image")
+    run_btn.click(run_inference, inputs=[weights_name, mode, context_choice], outputs=output)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+torchvision
+numpy
+matplotlib
+gradio
+torch.nn

src/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .custom_dataset import SpritesDataset, sprites_transform
+from .model import ContextUnet
+from .model import *

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (334 Bytes). View file

src/__pycache__/custom_dataset.cpython-312.pyc ADDED Viewed

Binary file (2.39 kB). View file

src/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (4.15 kB). View file

src/__pycache__/model_parts.cpython-312.pyc ADDED Viewed

Binary file (4.48 kB). View file

src/custom_dataset.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+class SpritesDataset(Dataset):
+    def __init__(self, images_path, labels_path, transform, null_context):
+        self.images = np.load(images_path, allow_pickle=False)
+        self.labels = np.load(labels_path, allow_pickle=False)
+        self.images_shape = self.images.shape
+        self.labels_shape = self.labels.shape
+        self.transform = transform
+        self.null_context = null_context
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, idx):
+        image = self.transform(self.images[idx])
+        if self.null_context:
+            label = torch.tensor(0).to(torch.int64)
+        else:
+            label = torch.tensor(self.labels[idx]).to(torch.int64)
+        return image, label
+    def __getshape__(self):
+        return self.images_shape, self.labels_shape
+sprites_transform = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize((0.5,0.5,0.5),
+                         (0.5,0.5,0.5))
+])

src/generators.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def denoise_add_noise(x, t, pred_noise, z=None):
+    if z is None:
+        z = torch.randn_like(x)
+    noise = betas.sqrt()[t] * z
+    mean = (x - pred_noise * ((1 - alphas[t]) / (1 - alphas_hat[t]).sqrt())) / alphas[t].sqrt()
+    return mean + noise

src/model.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from src.model_parts import ResidualDoubleConv, UpSample, DownSample, EmbedFC
+import torch.nn as nn
+import torch
+class ContextUnet(nn.Module):
+    def __init__(self, in_channels, features=256, context_features=10, image_size=(16, 16)):
+        super(ContextUnet, self).__init__()
+        self.in_channels = in_channels
+        self.features = features
+        self.context_features = context_features
+        self.height, self.width = image_size
+        self.init_conv = ResidualDoubleConv(in_channels, features, is_residual=True)
+        self.down1 = DownSample(features, features)
+        self.down2 = DownSample(features, 2*features)
+        self.to_vec = nn.Sequential(
+            nn.AvgPool2d((4)),
+            nn.GELU(),
+        )
+        self.timeembed1 = EmbedFC(1, 2*features)
+        self.timeembed2 = EmbedFC(1, 1*features)
+        self.contextembed1 = EmbedFC(context_features, 2*features)
+        self.contextembed2 = EmbedFC(context_features, 1*features)
+        self.up0 = nn.Sequential(
+            nn.ConvTranspose2d(2*features, 2*features, self.height//4, self.height//4),
+            nn.GroupNorm(8, 2*features),
+            nn.ReLU(),
+        )
+        self.up1 = UpSample(4*features, features)
+        self.up2 = UpSample(2*features, features)
+        self.out = nn.Sequential(
+            nn.Conv2d(2*features, features, 3, 1, 1),
+            nn.GroupNorm(8, features),
+            nn.ReLU(),
+            nn.Conv2d(features, self.in_channels, 3, 1, 1),
+        )
+    def forward(self, x, t, c=None):
+        x = self.init_conv(x)
+        down1 = self.down1(x)
+        down2 = self.down2(down1)
+        hiddenvec = self.to_vec(down2)
+        if c is None:
+            c = torch.zeros(x.shape[0], self.context_features).to(x)
+        cemb1 = self.contextembed1(c).view(-1, self.features*2, 1, 1)
+        temb1 = self.timeembed1(t).view(-1, self.features*2, 1, 1)
+        cemb2 = self.contextembed2(c).view(-1, self.features, 1, 1)
+        temb2 = self.timeembed2(t).view(-1, self.features, 1, 1)
+        up1 = self.up0(hiddenvec)
+        up2 = self.up1(cemb1*up1 + temb1, down2)
+        up3 = self.up2(cemb2*up2 + temb2, down1)
+        out = self.out(torch.cat((up3, x), 1))
+        return out

src/model_parts.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import numpy as np
+import torch
+import torch.nn as nn
+class ResidualDoubleConv(nn.Module):
+    def __init__(self, in_channels, out_channels, is_residual=False):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+        )
+        self.is_same_channels = in_channels == out_channels
+        self.is_residual = is_residual
+        if is_residual and not self.is_same_channels:
+            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
+        else:
+            self.shortcut = None
+    def forward(self, x):
+        out = self.conv(x)
+        if not self.is_residual:
+            return out
+        if self.is_same_channels:
+            out += x
+        else:
+            out += self.shortcut(x)
+        return out / np.sqrt(2) # Normalizing residual flow
+class UpSample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(UpSample, self).__init__()
+        self.conv = nn.Sequential(
+            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
+            ResidualDoubleConv(out_channels, out_channels),
+            ResidualDoubleConv(out_channels, out_channels),
+        )
+    def forward(self, x, skip):
+        x = torch.cat((x, skip), 1)
+        x = self.conv(x)
+        return x
+class DownSample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownSample, self).__init__()
+        # Diffusion nets handle residual connections inside DoubleConv
+        self.conv = nn.Sequential(
+            ResidualDoubleConv(in_channels, out_channels),
+            ResidualDoubleConv(out_channels, out_channels),
+            nn.MaxPool2d(2),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class EmbedFC(nn.Module):
+    def __init__(self, input_dim, embed_dim):
+        super(EmbedFC, self).__init__()
+        self.input_dim = input_dim
+        self.fc = nn.Sequential(
+            nn.Linear(input_dim, embed_dim),
+            nn.GELU(),
+            nn.Linear(embed_dim, embed_dim),
+        )
+    def forward(self, x):
+        x = x.view(-1, self.input_dim)
+        x = self.fc(x)
+        return x

weights/sprites_model_100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4955a5d39b625a60f0bd825b15fd2e9fae44b4643211054fa40418395e9a3cd
+size 94376581

weights/sprites_model_150.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db83ef40e458b5079179f4d681e8b18c69b08cd07fca817fc06d88dfbd231349
+size 94376581

weights/sprites_model_199.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19bdd26e7e07e95bcc230298b26564828d6cc3fb2f4ef4fa379b5bdde5a12347
+size 94376581

weights/sprites_model_50.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49cb4b55fb0371b40df841b1be6e2294ccdcf8c2489770b5aed08a0caa2aaf3d
+size 94376422