AYYasaswini commited on
Commit
bd02577
·
verified ·
1 Parent(s): 974b4f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -53,7 +53,6 @@ torch.manual_seed(1)
53
  logging.set_verbosity_error()
54
 
55
  # Set device
56
- torch_device = "cpu"
57
  #if "mps" == torch_device: os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
58
 
59
  """## Loading the models
@@ -91,7 +90,7 @@ If all you want is to make a picture with some text, you could ignore this noteb
91
 
92
  What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
93
  """
94
-
95
  # Some settings
96
  prompt = ["A watercolor painting of an otter"]
97
  height = 512 # default height of Stable Diffusion
@@ -400,7 +399,7 @@ The token is fed to the `token_embedding` to transform it into a vector. The fun
400
 
401
  We can look at the embedding layer:
402
  """
403
-
404
  # Access the embedding layer
405
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
406
  token_emb_layer # Vocab size 49408, emb_dim 768
@@ -881,7 +880,7 @@ num_inference_steps = 50 #@param # Number of denoising steps
881
  guidance_scale = 8 #@param # Scale for classifier-free guidance
882
  generator = torch.manual_seed(0) # Seed generator to create the inital latent noise
883
  batch_size = 1
884
- orange_loss_scale = 200 #@param
885
 
886
  # Prep text
887
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -936,7 +935,7 @@ for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps))
936
  denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
937
 
938
  # Calculate loss
939
- loss = orange_loss(denoised_images) * orange_loss_scale
940
 
941
  # Occasionally print it out
942
  if i%10==0:
@@ -963,7 +962,7 @@ num_inference_steps = 50 #@param # Number of denoising steps
963
  guidance_scale = 8 #@param # Scale for classifier-free guidance
964
  generator = torch.manual_seed(77) # Seed generator to create the inital latent noise
965
  batch_size = 1
966
- orange_loss_scale = 200 #@param
967
 
968
  # Prep text
969
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -1018,7 +1017,7 @@ for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps))
1018
  denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
1019
 
1020
  # Calculate loss
1021
- loss = orange_loss(denoised_images) * orange_loss_scale
1022
 
1023
  # Occasionally print it out
1024
  if i%10==0:
@@ -1045,7 +1044,7 @@ num_inference_steps = 50 #@param # Number of denoising steps
1045
  guidance_scale = 8 #@param # Scale for classifier-free guidance
1046
  generator = torch.manual_seed(42) # Seed generator to create the inital latent noise
1047
  batch_size = 1
1048
- orange_loss_scale = 200 #@param
1049
 
1050
  # Prep text
1051
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -1135,7 +1134,28 @@ This notebook was written by Jonathan Whitaker, adapted from ['Grokking Stable D
1135
 
1136
  import gradio as gr
1137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1138
 
 
1139
  dict_styles = {'<gartic-phone>':'styles/learned_embeds_gartic-phone.bin',
1140
  '<hawaiian shirt>':'styles/learned_embeds_hawaiian-shirt.bin',
1141
  '<gp>': 'styles/learned_embeds_phone01.bin',
@@ -1147,7 +1167,7 @@ def inference(prompt, style):
1147
 
1148
  if prompt is not None and style is not None:
1149
  style = dict_styles[style]
1150
- result = generate_with_prompt_style_guidance(prompt, style)
1151
  return np.array(result)
1152
  else:
1153
  return None
@@ -1168,5 +1188,6 @@ demo = gr.Interface(inference,
1168
  # examples = examples,
1169
  # cache_examples=True
1170
  )
 
1171
  demo.launch()
1172
 
 
53
  logging.set_verbosity_error()
54
 
55
  # Set device
 
56
  #if "mps" == torch_device: os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
57
 
58
  """## Loading the models
 
90
 
91
  What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
92
  """
93
+ ##########################################################################################
94
  # Some settings
95
  prompt = ["A watercolor painting of an otter"]
96
  height = 512 # default height of Stable Diffusion
 
399
 
400
  We can look at the embedding layer:
401
  """
402
+ #########################################################################################
403
  # Access the embedding layer
404
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
405
  token_emb_layer # Vocab size 49408, emb_dim 768
 
880
  guidance_scale = 8 #@param # Scale for classifier-free guidance
881
  generator = torch.manual_seed(0) # Seed generator to create the inital latent noise
882
  batch_size = 1
883
+ blue_loss_scale = 200 #@param
884
 
885
  # Prep text
886
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
 
935
  denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
936
 
937
  # Calculate loss
938
+ loss = orange_loss(denoised_images) * blue_loss_scale
939
 
940
  # Occasionally print it out
941
  if i%10==0:
 
962
  guidance_scale = 8 #@param # Scale for classifier-free guidance
963
  generator = torch.manual_seed(77) # Seed generator to create the inital latent noise
964
  batch_size = 1
965
+ blue_loss_scale = 200 #@param
966
 
967
  # Prep text
968
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
 
1017
  denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
1018
 
1019
  # Calculate loss
1020
+ loss = orange_loss(denoised_images) * blue_loss_scale
1021
 
1022
  # Occasionally print it out
1023
  if i%10==0:
 
1044
  guidance_scale = 8 #@param # Scale for classifier-free guidance
1045
  generator = torch.manual_seed(42) # Seed generator to create the inital latent noise
1046
  batch_size = 1
1047
+ blue_loss_scale = 200 #@param
1048
 
1049
  # Prep text
1050
  text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
 
1134
 
1135
  import gradio as gr
1136
 
1137
+ def generate_image_from_prompt(text_in, style_in):
1138
+ STYLE_LIST = ['oil_style.bin', 'valorant_style.bin', 'cartoon_syle.bin', 'space_style.bin', 'terraria_syle.bin']
1139
+ STYLE_SEEDS = [128, 64, 128, 64, 128]
1140
+
1141
+ print(text_in)
1142
+ print(style_in)
1143
+ style_file = style_in + '_style.bin'
1144
+ idx = STYLE_LIST.index(style_file)
1145
+ print(style_file)
1146
+ print(idx)
1147
+
1148
+ prompt = text_in
1149
+
1150
+ style_seed = STYLE_SEEDS[idx]
1151
+ style_dict = torch.load(style_file)
1152
+ style_embed = [v for v in style_dict.values()]
1153
+
1154
+ generated_image = embed_style(prompt, style_embed[0], style_seed)
1155
+
1156
+ loss_generated_img = (loss_style(prompt, style_embed[0], style_seed))
1157
 
1158
+ return [generated_image, loss_generated_img]
1159
  dict_styles = {'<gartic-phone>':'styles/learned_embeds_gartic-phone.bin',
1160
  '<hawaiian shirt>':'styles/learned_embeds_hawaiian-shirt.bin',
1161
  '<gp>': 'styles/learned_embeds_phone01.bin',
 
1167
 
1168
  if prompt is not None and style is not None:
1169
  style = dict_styles[style]
1170
+ result = generate_image_from_prompt(prompt, style)
1171
  return np.array(result)
1172
  else:
1173
  return None
 
1188
  # examples = examples,
1189
  # cache_examples=True
1190
  )
1191
+
1192
  demo.launch()
1193