Spaces:

AYYasaswini
/

Stable_Diffusion_Assignment24

Sleeping

App Files Files Community

AYYasaswini commited on Aug 2, 2024

Commit

bd02577

verified ·

1 Parent(s): 974b4f9

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -9

app.py CHANGED Viewed

@@ -53,7 +53,6 @@ torch.manual_seed(1)
 logging.set_verbosity_error()
 # Set device
-torch_device = "cpu"
 #if "mps" == torch_device: os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
 """## Loading the models
@@ -91,7 +90,7 @@ If all you want is to make a picture with some text, you could ignore this noteb
 What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
 """
 # Some settings
 prompt = ["A watercolor painting of an otter"]
 height = 512                        # default height of Stable Diffusion
@@ -400,7 +399,7 @@ The token is fed to the `token_embedding` to transform it into a vector. The fun
 We can look at the embedding layer:
 """
 # Access the embedding layer
 token_emb_layer = text_encoder.text_model.embeddings.token_embedding
 token_emb_layer # Vocab size 49408, emb_dim 768
@@ -881,7 +880,7 @@ num_inference_steps = 50  #@param           # Number of denoising steps
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(0)   # Seed generator to create the inital latent noise
 batch_size = 1
-orange_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -936,7 +935,7 @@ for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps))
         denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
         # Calculate loss
-        loss = orange_loss(denoised_images) * orange_loss_scale
         # Occasionally print it out
         if i%10==0:
@@ -963,7 +962,7 @@ num_inference_steps = 50  #@param           # Number of denoising steps
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(77)   # Seed generator to create the inital latent noise
 batch_size = 1
-orange_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -1018,7 +1017,7 @@ for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps))
         denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
         # Calculate loss
-        loss = orange_loss(denoised_images) * orange_loss_scale
         # Occasionally print it out
         if i%10==0:
@@ -1045,7 +1044,7 @@ num_inference_steps = 50  #@param           # Number of denoising steps
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(42)   # Seed generator to create the inital latent noise
 batch_size = 1
-orange_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -1135,7 +1134,28 @@ This notebook was written by Jonathan Whitaker, adapted from ['Grokking Stable D
 import gradio as gr
 dict_styles = {'<gartic-phone>':'styles/learned_embeds_gartic-phone.bin',
                '<hawaiian shirt>':'styles/learned_embeds_hawaiian-shirt.bin',
                '<gp>': 'styles/learned_embeds_phone01.bin',
@@ -1147,7 +1167,7 @@ def inference(prompt, style):
     if prompt is not None and style is not None:
         style = dict_styles[style]
-        result = generate_with_prompt_style_guidance(prompt, style)
         return np.array(result)
     else:
         return None
@@ -1168,5 +1188,6 @@ demo = gr.Interface(inference,
                     # examples = examples,
                     # cache_examples=True
                     )
 demo.launch()

 logging.set_verbosity_error()
 # Set device
 #if "mps" == torch_device: os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
 """## Loading the models
 What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
 """
+##########################################################################################
 # Some settings
 prompt = ["A watercolor painting of an otter"]
 height = 512                        # default height of Stable Diffusion
 We can look at the embedding layer:
 """
+#########################################################################################
 # Access the embedding layer
 token_emb_layer = text_encoder.text_model.embeddings.token_embedding
 token_emb_layer # Vocab size 49408, emb_dim 768
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(0)   # Seed generator to create the inital latent noise
 batch_size = 1
+blue_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
         denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
         # Calculate loss
+        loss = orange_loss(denoised_images) * blue_loss_scale
         # Occasionally print it out
         if i%10==0:
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(77)   # Seed generator to create the inital latent noise
 batch_size = 1
+blue_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
         denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
         # Calculate loss
+        loss = orange_loss(denoised_images) * blue_loss_scale
         # Occasionally print it out
         if i%10==0:
 guidance_scale = 8 #@param               # Scale for classifier-free guidance
 generator = torch.manual_seed(42)   # Seed generator to create the inital latent noise
 batch_size = 1
+blue_loss_scale = 200 #@param
 # Prep text
 text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
 import gradio as gr
+def generate_image_from_prompt(text_in, style_in):
+    STYLE_LIST = ['oil_style.bin', 'valorant_style.bin', 'cartoon_syle.bin', 'space_style.bin', 'terraria_syle.bin']
+    STYLE_SEEDS = [128, 64, 128, 64, 128]
+    print(text_in)
+    print(style_in)
+    style_file = style_in + '_style.bin'
+    idx = STYLE_LIST.index(style_file)
+    print(style_file)
+    print(idx)
+    prompt = text_in
+    style_seed = STYLE_SEEDS[idx]
+    style_dict = torch.load(style_file)
+    style_embed = [v for v in style_dict.values()]
+    generated_image = embed_style(prompt, style_embed[0], style_seed)
+    loss_generated_img = (loss_style(prompt, style_embed[0], style_seed))
+    return [generated_image, loss_generated_img]
 dict_styles = {'<gartic-phone>':'styles/learned_embeds_gartic-phone.bin',
                '<hawaiian shirt>':'styles/learned_embeds_hawaiian-shirt.bin',
                '<gp>': 'styles/learned_embeds_phone01.bin',
     if prompt is not None and style is not None:
         style = dict_styles[style]
+        result = generate_image_from_prompt(prompt, style)
         return np.array(result)
     else:
         return None
                     # examples = examples,
                     # cache_examples=True
                     )
 demo.launch()