Spaces:

rynmurdock
/

generative_recsys

Runtime error

App Files Files Community

rynmurdock commited on Apr 12, 2024

Commit

6f68207

verified ·

1 Parent(s): 1146833

May be faster; will be different qualitatively; may revert

Browse files

Files changed (1) hide show

app.py +59 -34

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-DEVICE = 'cpu'
 import gradio as gr
 import numpy as np
@@ -6,11 +6,9 @@ from sklearn.svm import LinearSVC
 from sklearn import preprocessing
 import pandas as pd
-from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel
 from diffusers.models import ImageProjection
-from patch_sdxl import SDEmb
 import torch
-import spaces
 import random
 import time
@@ -22,8 +20,10 @@ from PIL import Image
 import requests
 from io import BytesIO, StringIO
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 prompt_list = [p for p in list(set(
                 pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
@@ -36,11 +36,17 @@ sdxl_lightening = "ByteDance/SDXL-Lightning"
 ckpt = "sdxl_lightning_2step_unet.safetensors"
 unet = UNet2DConditionModel.from_config(model_id, subfolder="unet").to("cuda", torch.float16)
 unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt), device="cuda"))
-pipe = SDEmb.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 pipe.to(device='cuda')
-pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 output_hidden_state = False
 #######################
@@ -54,14 +60,27 @@ def predict(
     """Run a single prediction on the model"""
     with torch.no_grad():
         if im_emb == None:
-            im_emb = torch.zeros(1, 1280, dtype=torch.float16, device='cuda')
-        image = pipe(
-            prompt=prompt,
-            ip_adapter_emb=im_emb.to('cuda'),
-            height=1024,
-            width=1024,
-            num_inference_steps=2,
-            guidance_scale=0,
             ).images[0]
         im_emb, _ = pipe.encode_image(
                 image, 'cuda', 1, output_hidden_state
@@ -77,8 +96,8 @@ def next_image(embs, ys, calibrate_prompts):
     # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
     if len(calibrate_prompts) == 0 and len(list(set(ys))) <= 1:
-        embs.append(.01*torch.randn(1, 1280))
-        embs.append(.01*torch.randn(1, 1280))
         ys.append(0)
         ys.append(1)
@@ -92,35 +111,41 @@ def next_image(embs, ys, calibrate_prompts):
             return image, embs, ys, calibrate_prompts
         else:
             print('######### Roaming #########')
-            # sample only as many negatives as there are positives
-            indices = range(len(ys))
-            pos_indices = [i for i in indices if ys[i] == 1]
-            neg_indices = [i for i in indices if ys[i] == 0]
-            lower = min(len(pos_indices), len(neg_indices))
-            neg_indices = random.sample(neg_indices, lower)
-            pos_indices = random.sample(pos_indices, lower)
-            cut_embs = [embs[i] for i in neg_indices] + [embs[i] for i in pos_indices]
-            cut_ys = [ys[i] for i in neg_indices] + [ys[i] for i in pos_indices]
-            feature_embs = torch.stack([e[0].detach().cpu() for e in cut_embs])
             scaler = preprocessing.StandardScaler().fit(feature_embs)
             feature_embs = scaler.transform(feature_embs)
-            print(np.array(feature_embs).shape, np.array(ys).shape)
-            lin_class = LinearSVC(max_iter=50000, dual='auto', class_weight='balanced').fit(np.array(feature_embs), np.array(cut_ys))
             lin_class.coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
             lin_class.coef_ = (lin_class.coef_.flatten() / (lin_class.coef_.flatten().norm())).unsqueeze(0)
             rng_prompt = random.choice(prompt_list)
             w = 1# if len(embs) % 2 == 0 else 0
             im_emb = w * lin_class.coef_.to(device=DEVICE, dtype=torch.float16)
-            prompt= 'an image' if glob_idx % 2 == 0 else rng_prompt
-            print(prompt)
             image, im_emb = predict(prompt, im_emb)
             embs.append(im_emb)
             return image, embs, ys, calibrate_prompts

+DEVICE = 'cuda'
 import gradio as gr
 import numpy as np
 from sklearn import preprocessing
 import pandas as pd
+from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel, AutoPipelineForText2Image
 from diffusers.models import ImageProjection
 import torch
 import random
 import time
 import requests
 from io import BytesIO, StringIO
+from transformers import CLIPVisionModelWithProjection
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+import spaces
 prompt_list = [p for p in list(set(
                 pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 ckpt = "sdxl_lightning_2step_unet.safetensors"
 unet = UNet2DConditionModel.from_config(model_id, subfolder="unet").to("cuda", torch.float16)
 unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt), device="cuda"))
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter",  subfolder="models/image_encoder", torch_dtype=torch.float16,).to("cuda")
+pipe = AutoPipelineForText2Image.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16", image_encoder=image_encoder).to("cuda")
+pipe.unet._load_ip_adapter_weights(torch.load(hf_hub_download('h94/IP-Adapter', 'sdxl_models/ip-adapter_sdxl_vit-h.bin')))
+pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl_vit-h.bin")
+pipe.register_modules(image_encoder = image_encoder)
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 pipe.to(device='cuda')
 output_hidden_state = False
 #######################
     """Run a single prediction on the model"""
     with torch.no_grad():
         if im_emb == None:
+            im_emb = torch.zeros(1, 1024, dtype=torch.float16, device='cuda')
+        im_emb = [im_emb.to('cuda').unsqueeze(0)]
+        if prompt == '':
+            image = pipe(
+                prompt_embeds=torch.zeros(1, 1, 2048, dtype=torch.float16, device='cuda'),
+                pooled_prompt_embeds=torch.zeros(1, 1280, dtype=torch.float16, device='cuda'),
+                ip_adapter_image_embeds=im_emb,
+                height=1024,
+                width=1024,
+                num_inference_steps=2,
+                guidance_scale=0,
+            ).images[0]
+        else:
+            image = pipe(
+                prompt=prompt,
+                ip_adapter_image_embeds=im_emb,
+                height=1024,
+                width=1024,
+                num_inference_steps=2,
+                guidance_scale=0,
             ).images[0]
         im_emb, _ = pipe.encode_image(
                 image, 'cuda', 1, output_hidden_state
     # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
     if len(calibrate_prompts) == 0 and len(list(set(ys))) <= 1:
+        embs.append(.01*torch.randn(1, 1024))
+        embs.append(.01*torch.randn(1, 1024))
         ys.append(0)
         ys.append(1)
             return image, embs, ys, calibrate_prompts
         else:
             print('######### Roaming #########')
+            # sample a .8 of rated embeddings for some stochasticity, or at least two embeddings.
+            n_to_choose = max(int(len(embs)*.8), 2)
+            indices = random.sample(range(len(embs)), n_to_choose)
+            # also add the latest 0 and the latest 1
+            has_0 = False
+            has_1 = False
+            for i in reversed(range(len(ys))):
+                if ys[i] == 0 and has_0 == False:
+                    indices.append(i)
+                    has_0 = True
+                elif ys[i] == 1 and has_1 == False:
+                    indices.append(i)
+                    has_1 = True
+                if has_0 and has_1:
+                    break
+            feature_embs = np.array(torch.cat([embs[i] for i in indices]).to('cpu'))
             scaler = preprocessing.StandardScaler().fit(feature_embs)
             feature_embs = scaler.transform(feature_embs)
+            lin_class = LinearSVC(max_iter=50000, dual='auto', class_weight='balanced').fit(feature_embs, np.array([ys[i] for i in indices]))
             lin_class.coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
             lin_class.coef_ = (lin_class.coef_.flatten() / (lin_class.coef_.flatten().norm())).unsqueeze(0)
             rng_prompt = random.choice(prompt_list)
             w = 1# if len(embs) % 2 == 0 else 0
             im_emb = w * lin_class.coef_.to(device=DEVICE, dtype=torch.float16)
+            prompt= '' if glob_idx % 2 == 0 else rng_prompt
+            print(prompt, len(ys))
             image, im_emb = predict(prompt, im_emb)
             embs.append(im_emb)
+            if len(embs) > 100:
+                embs.pop(0)
+                ys.pop(0)
             return image, embs, ys, calibrate_prompts