Spaces:

Blealtan
/

clip-guided-binary-autoencoder

Runtime error

App Files Files Community

Blealtan commited on Feb 15, 2023

Commit

2034096

1 Parent(s): 9bd4927

Refine code and use text instead of file

Browse files

Files changed (1) hide show

app.py +41 -57

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from huggingface_hub import hf_hub_url, cached_download
 import streamlit as st
 import io
 import gc
 ########################################################################################################
 # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
@@ -20,6 +22,8 @@ from torchvision.transforms import functional as VF
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 class ToBinary(torch.autograd.Function):
@@ -52,9 +56,8 @@ class ResBlock(nn.Module):
 class REncoderSmall(nn.Module):
-    def __init__(self, args):
         super().__init__()
-        self.args = args
         dd = 8
         self.Bxx = nn.BatchNorm2d(dd * 64)
@@ -80,10 +83,7 @@ class REncoderSmall(nn.Module):
         self.C22 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1)
         self.C23 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1)
-        self.COUT = nn.Conv2d(dd * 64,
-                              args.my_img_bit,
-                              kernel_size=3,
-                              padding=1)
     def forward(self, img):
         ACT = F.mish
@@ -110,14 +110,10 @@ class REncoderSmall(nn.Module):
 class RDecoderSmall(nn.Module):
-    def __init__(self, args):
         super().__init__()
-        self.args = args
         dd = 8
-        self.CIN = nn.Conv2d(args.my_img_bit,
-                             dd * 64,
-                             kernel_size=3,
-                             padding=1)
         self.B00 = nn.BatchNorm2d(dd * 64)
         self.C00 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1)
@@ -165,9 +161,8 @@ class RDecoderSmall(nn.Module):
 class REncoderLarge(nn.Module):
-    def __init__(self, args, dd, ee, ff):
         super().__init__()
-        self.args = args
         self.CXX = nn.Conv2d(3, dd, kernel_size=3, padding=1)
         self.BXX = nn.BatchNorm2d(dd)
         self.CX0 = nn.Conv2d(dd, ee, kernel_size=3, padding=1)
@@ -175,10 +170,7 @@ class REncoderLarge(nn.Module):
         self.R0 = ResBlock(dd * 4, ff)
         self.R1 = ResBlock(dd * 16, ff)
         self.R2 = ResBlock(dd * 64, ff)
-        self.CZZ = nn.Conv2d(dd * 64,
-                             args.my_img_bit,
-                             kernel_size=3,
-                             padding=1)
     def forward(self, x):
         ACT = F.mish
@@ -198,13 +190,9 @@ class REncoderLarge(nn.Module):
 class RDecoderLarge(nn.Module):
-    def __init__(self, args, dd, ee, ff):
         super().__init__()
-        self.args = args
-        self.CZZ = nn.Conv2d(args.my_img_bit,
-                             dd * 64,
-                             kernel_size=3,
-                             padding=1)
         self.BZZ = nn.BatchNorm2d(dd * 64)
         self.R0 = ResBlock(dd * 64, ff)
         self.R1 = ResBlock(dd * 16, ff)
@@ -234,32 +222,22 @@ def prepare_model(model_prefix):
     gc.collect()
     if model_prefix == 'out-v7c_d8_256-224-13bit-OB32x0.5-745':
-        R_ENCODER, R_DECODER = REncoderSmall, RDecoderSmall
     else:
         if 'd16_512' in model_prefix:
             dd, ee, ff = 16, 64, 512
         elif 'd32_1024' in model_prefix:
             dd, ee, ff = 32, 128, 1024
-        R_ENCODER, R_DECODER = ((lambda args: REncoderLarge(args, dd, ee, ff)),
-                                (lambda args: RDecoderLarge(args, dd, ee, ff)))
-    args = types.SimpleNamespace()
-    args.my_img_bit = 13
-    encoder = R_ENCODER(args).eval().to(device)
-    decoder = R_DECODER(args).eval().to(device)
-    zpow = torch.tensor([2**i for i in range(0, 13)]).reshape(13, 1, 1)
-    zpow = zpow.to(device).long()
     encoder.load_state_dict(
-        torch.load(
-            cached_download(hf_hub_url(MODEL_REPO, f'{model_prefix}-E.pth'))))
     decoder.load_state_dict(
-        torch.load(
-            cached_download(hf_hub_url(MODEL_REPO, f'{model_prefix}-D.pth'))))
-    encoder.eval()
-    decoder.eval()
     return encoder, decoder
@@ -277,11 +255,23 @@ def encode(model_prefix, img):
         z = encoder(img)
         z = ToBinary.apply(z)
-    return z.cpu().numpy()
-def decode(model_prefix, z):
     _, decoder = prepare_model(model_prefix)
     decoded = decoder(torch.Tensor(z).to(device))
     return VF.to_pil_image(decoded[0])
@@ -300,20 +290,14 @@ with encoder_tab:
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         col_in.image(image, 'Input Image')
-        z = encode(model_prefix, image)
-        with io.BytesIO() as buffer:
-            np.save(buffer, z)
-            col_out.download_button(
-                label="Download Encoded Data",
-                data=buffer,
-                file_name=uploaded_file.name + '.npy',
-            )
-        col_out.image(decode(model_prefix, z), 'Output Image preview')
 with decoder_tab:
     col_in, col_out = st.columns(2)
-    uploaded_file = col_in.file_uploader('Choose an Encoded Data')
-    if uploaded_file is not None:
-        z = np.load(uploaded_file)
-        image = decode(model_prefix, z)
         col_out.image(image, 'Output Image')

+import base64
+from huggingface_hub import hf_hub_download
 import streamlit as st
 import io
 import gc
+import json
 ########################################################################################################
 # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+IMG_BITS = 13
 class ToBinary(torch.autograd.Function):
 class REncoderSmall(nn.Module):
+    def __init__(self):
         super().__init__()
         dd = 8
         self.Bxx = nn.BatchNorm2d(dd * 64)
         self.C22 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1)
         self.C23 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1)
+        self.COUT = nn.Conv2d(dd * 64, IMG_BITS, kernel_size=3, padding=1)
     def forward(self, img):
         ACT = F.mish
 class RDecoderSmall(nn.Module):
+    def __init__(self):
         super().__init__()
         dd = 8
+        self.CIN = nn.Conv2d(IMG_BITS, dd * 64, kernel_size=3, padding=1)
         self.B00 = nn.BatchNorm2d(dd * 64)
         self.C00 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1)
 class REncoderLarge(nn.Module):
+    def __init__(self, dd, ee, ff):
         super().__init__()
         self.CXX = nn.Conv2d(3, dd, kernel_size=3, padding=1)
         self.BXX = nn.BatchNorm2d(dd)
         self.CX0 = nn.Conv2d(dd, ee, kernel_size=3, padding=1)
         self.R0 = ResBlock(dd * 4, ff)
         self.R1 = ResBlock(dd * 16, ff)
         self.R2 = ResBlock(dd * 64, ff)
+        self.CZZ = nn.Conv2d(dd * 64, IMG_BITS, kernel_size=3, padding=1)
     def forward(self, x):
         ACT = F.mish
 class RDecoderLarge(nn.Module):
+    def __init__(self, dd, ee, ff):
         super().__init__()
+        self.CZZ = nn.Conv2d(IMG_BITS, dd * 64, kernel_size=3, padding=1)
         self.BZZ = nn.BatchNorm2d(dd * 64)
         self.R0 = ResBlock(dd * 64, ff)
         self.R1 = ResBlock(dd * 16, ff)
     gc.collect()
     if model_prefix == 'out-v7c_d8_256-224-13bit-OB32x0.5-745':
+        R_ENCODER, R_DECODER = REncoderSmall(), RDecoderSmall()
     else:
         if 'd16_512' in model_prefix:
             dd, ee, ff = 16, 64, 512
         elif 'd32_1024' in model_prefix:
             dd, ee, ff = 32, 128, 1024
+        R_ENCODER = REncoderLarge(dd, ee, ff)
+        R_DECODER = RDecoderLarge(dd, ee, ff)
+    encoder = R_ENCODER.eval().to(device)
+    decoder = R_DECODER.eval().to(device)
     encoder.load_state_dict(
+        torch.load(hf_hub_download(MODEL_REPO, f'{model_prefix}-E.pth')))
     decoder.load_state_dict(
+        torch.load(hf_hub_download(MODEL_REPO, f'{model_prefix}-D.pth')))
     return encoder, decoder
         z = encoder(img)
         z = ToBinary.apply(z)
+    with io.BytesIO() as buffer:
+        np.save(buffer, np.packbits(z.cpu().numpy().astype('bool')))
+        z_b64 = base64.b64encode(buffer.getvalue()).decode()
+    return json.dumps({"shape": list(z.shape), "data": z_b64})
+def decode(model_prefix, z_str):
     _, decoder = prepare_model(model_prefix)
+    z_json = json.loads(z_str)
+    with io.BytesIO() as buffer:
+        buffer.write(base64.b64decode(z_json["data"]))
+        buffer.seek(0)
+        z = np.load(buffer)
+    z = np.unpackbits(z).astype('float').reshape(z_json["shape"])
     decoded = decoder(torch.Tensor(z).to(device))
     return VF.to_pil_image(decoded[0])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         col_in.image(image, 'Input Image')
+        z_str = encode(model_prefix, image)
+        col_out.write("Encoded to:")
+        col_out.code(z_str,language=None)
+        col_out.image(decode(model_prefix, z_str), 'Output Image preview')
 with decoder_tab:
     col_in, col_out = st.columns(2)
+    z_str = col_in.text_area('Paste encoded string here:')
+    if len(z_str) > 0:
+        image = decode(model_prefix, z_str)
         col_out.image(image, 'Output Image')