manbeast3b
/

disburbedPanda2

Model card Files Files and versions

xet

Community

manbeast3b commited on Dec 10, 2024

Commit

026d1ee

verified ·

1 Parent(s): c73798e

Update src/model.py

Browse files

Files changed (1) hide show

src/model.py +88 -42

src/model.py CHANGED Viewed

@@ -1,46 +1,92 @@
-import torch as T, torch.nn as n, torch.nn.functional as f
-def C(v, w, **k): return n.Conv2d(v, w, 3, padding=1, **k)
-class Z(n.Module):
-    def forward(s, x): return T.tanh(x / 3) * 3
-class A(n.Module):
-    def __init__(s, i, o):
         super().__init__()
-        s.a = n.Sequential(C(i, o), n.ReLU(), C(o, o), n.ReLU(), C(o, o))
-        s.b = n.Conv2d(i, o, 1, bias=False) if i != o else n.Identity()
-        s.c = n.ReLU()
-    def forward(s, x): return s.c(s.a(x) + s.b(x))
-def E(c=4):
-    return n.Sequential(
-        C(3, 64), A(64, 64),
-        C(64, 64, stride=2, bias=False), A(64, 64), A(64, 64), A(64, 64),
-        C(64, 64, stride=2, bias=False), A(64, 64), A(64, 64), A(64, 64),
-        C(64, 64, stride=2, bias=False), A(64, 64), A(64, 64), A(64, 64),
-        C(64, c))
-def D(c=4):
-    return n.Sequential(
-        Z(), C(c, 64), n.ReLU(),
-        A(64, 64), n.Upsample(scale_factor=2), C(64, 64, bias=False), n.ReLU(),
-        A(64, 64), n.Upsample(scale_factor=2), C(64, 64, bias=False), n.ReLU(),
-        A(64, 64), n.Upsample(scale_factor=2), C(64, 64, bias=False), n.ReLU(),
-        A(64, 64), C(64, 3))
-class F(n.Module):
-    M, N = 3, 0.5
-    def __init__(s, p1="a.pth", p2="b.pth", c=None):
         super().__init__()
-        c = c or s.H(str(p1))
-        s.a, s.b = E(c), D(c)
-        if p1: s.L(s.a, p1, 'encoder')
-        if p2: s.L(s.b, p2, 'decoder')
-        s.a.requires_grad_(False), s.b.requires_grad_(False)
     @staticmethod
-    def S(x): return x.div(2 * F.M).add(F.N).clamp(0, 1)
     @staticmethod
-    def U(x): return x.sub(F.N).mul(2 * F.M)
-    def L(s, m, p, q):
-        sd = {k.strip(f"{q}."): v for k, v in T.load(p, map_location="cpu", weights_only=True).items() if k.strip(f"{q}.") in m.state_dict() and v.size() == m.state_dict()[k.strip(f"{q}.")].size()}
-        # print(f" {len(sd)} filtered keys for {q}, total: {len(m.state_dict())}")
-        m.load_state_dict(sd, strict=False)
-    def forward(s, x, r=False):
-        l = s.a(x)
-        o = s.b(l)
-        return (o.clamp(0, 1), l) if r else o.clamp(0, 1)

+import torch
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+def conv(n_in, n_out, **kwargs):
+    return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
+class Clamp(nn.Module):
+    def forward(self, x):
+        return torch.tanh(x / 3) * 3
+class Block(nn.Module):
+    def __init__(self, n_in, n_out):
         super().__init__()
+        self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
+        self.skip = nn.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
+        self.fuse = nn.ReLU()
+    def forward(self, x):
+        return self.fuse(self.conv(x) + self.skip(x))
+def Encoder(latent_channels=4):
+    return nn.Sequential(
+        conv(3, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, latent_channels),
+    )
+def Decoder(latent_channels=16):  # Adjusted to match expected input channels
+    return nn.Sequential(
+        Clamp(),
+        conv(latent_channels, 48),  # Reduced from 64 to 48 channels
+        nn.ReLU(),
+        Block(48, 48), Block(48, 48),  # Reduced number of blocks
+        nn.Upsample(scale_factor=2), conv(48, 48, bias=False),
+        Block(48, 48), Block(48, 48),  # Reduced number of blocks
+        nn.Upsample(scale_factor=2), conv(48, 48, bias=False),
+        Block(48, 48),  # Further reduction in blocks
+        nn.Upsample(scale_factor=2), conv(48, 48, bias=False),
+        Block(48, 48),
+        conv(48, 3),  # Final convolution to output channels
+    )
+class Model(nn.Module):
+    latent_magnitude = 3
+    latent_shift = 0.5
+    def __init__(self, encoder_path="encoder.pth", decoder_path="decoder.pth", latent_channels=None):
         super().__init__()
+        if latent_channels is None:
+            latent_channels = self.guess_latent_channels(str(encoder_path))
+        self.encoder = Encoder(latent_channels)
+        self.decoder = Decoder(latent_channels)
+        if encoder_path is not None:
+            encoder_state_dict = torch.load(encoder_path, map_location="cpu", weights_only=True)
+            filtered_state_dict = {k.strip('encoder.'): v for k, v in encoder_state_dict.items() if k.strip('encoder.') in self.encoder.state_dict() and v.size() == self.encoder.state_dict()[k.strip('encoder.')].size()}
+            print(f" num of keys in filtered: {len(filtered_state_dict)} and in decoder: {len(self.encoder.state_dict())}")
+            self.encoder.load_state_dict(filtered_state_dict, strict=False)
+        if decoder_path is not None:
+            decoder_state_dict = torch.load(decoder_path, map_location="cpu", weights_only=True)
+            filtered_state_dict = {k.strip('decoder.'): v for k, v in decoder_state_dict.items() if k.strip('decoder.') in self.decoder.state_dict() and v.size() == self.decoder.state_dict()[k.strip('decoder.')].size()}
+            print(f" num of keys in filtered: {len(filtered_state_dict)} and in decoder: {len(self.decoder.state_dict())}")
+            self.decoder.load_state_dict(filtered_state_dict, strict=False)
+        self.encoder.requires_grad_(False)
+        self.decoder.requires_grad_(False)
+    def guess_latent_channels(self, encoder_path):
+        if "taef1" in encoder_path:return 16
+        if "taesd3" in encoder_path:return 16
+        return 4
     @staticmethod
+    def scale_latents(x):
+        return x.div(2 * Model.latent_magnitude).add(Model.latent_shift).clamp(0, 1)
     @staticmethod
+    def unscale_latents(x):
+        return x.sub(Model.latent_shift).mul(2 * Model.latent_magnitude)
+    def forward(self, x, return_latent=False):
+        latent = self.encoder(x)
+        out = self.decoder(latent)
+        if return_latent:
+            return out.clamp(0, 1), latent
+        return out.clamp(0, 1)