Spaces:

pva22
/

course_diffusion_final_project

Sleeping

App Files Files Community

pva22 commited on Feb 22, 2025

Commit

d24c692

1 Parent(s): 3ea7b53

new lora and app

Browse files

Files changed (7) hide show

.DS_Store +0 -0
app.py +7 -141
lora/.DS_Store +0 -0
lora/unet/.DS_Store +0 -0
lora/unet/adapter_config.json +3 -3
lora/unet/adapter_model.safetensors +1 -1
lora/unet/config.json +0 -68

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -57,21 +57,17 @@ def align_embeddings(prompt_embeds, negative_prompt_embeds):
            torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
 device = "cuda" if torch.cuda.is_available() else "cpu"
-#model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
 model_id_default = "sd-legacy/stable-diffusion-v1-5"
-model_dropdown = ['stabilityai/sdxl-turbo', 'CompVis/stable-diffusion-v1-4', 'sd-legacy/stable-diffusion-v1-5'  ]
 model_lora_default = "lora"
-model_lora_dropdown = ['lora', 'lora']
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
-# pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-# pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
@@ -96,10 +92,6 @@ def infer(
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    # убираем обновление pipe всегда
-    #pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-    #pipe = pipe.to(device)
     # добавляем обновление pipe по условию
     if model_repo_id != model_id_default:
@@ -109,7 +101,6 @@ def infer(
         prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
     else:
         # добавляем lora
-        #pipe = get_lora_sd_pipeline(ckpt_dir='./lora_lady_and_cats_logos', base_model_name_or_path=model_id_default, dtype=torch_dtype).to(device)
         pipe = get_lora_sd_pipeline(ckpt_dir='./' + model_lora_id, base_model_name_or_path=model_id_default, dtype=torch_dtype).to(device)
         prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
         negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
@@ -118,19 +109,6 @@ def infer(
         print(f"LoRA scale applied: {lora_scale}")
         pipe.fuse_lora(lora_scale=lora_scale)
-    # заменяем просто вызов pipe с промптом
-    #image = pipe(
-    #    prompt=prompt,
-    #    negative_prompt=negative_prompt,
-    #    guidance_scale=guidance_scale,
-    #    num_inference_steps=num_inference_steps,
-    #    width=width,
-    #    height=height,
-    #    generator=generator,
-    #).images[0]
     # на вызов pipe с эмбеддингами
     params = {
         'prompt_embeds': prompt_embeds,
@@ -144,17 +122,12 @@ def infer(
     return pipe(**params).images[0], seed
-    # return image, seed
 examples = [
-    "Puss in Boots wearing a sombrero crosses the Grand Canyon on a tightrope with a guitar.",
-    "A cat is playing a song called ""About the Cat"" on an accordion by the sea at sunset. The sun is quickly setting behind the horizon, and the light is fading.",
-    "A cat walks through the grass on the streets of an abandoned city. The camera view is always focused on the cat's face.",
-    "A young lady in a Russian embroidered kaftan is sitting on a beautiful carved veranda, holding a cup to her mouth and drinking tea from the cup. With her other hand, the girl holds a saucer. The cup and saucer are painted with gzhel. Next to the girl on the table stands a samovar, and steam can be seen above it.",
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
 ]
 css = """
@@ -166,7 +139,7 @@ css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image SemaSci Template")
         with gr.Row():
             prompt = gr.Text(
@@ -181,112 +154,5 @@ with gr.Blocks(css=css) as demo:
         result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-#            model_repo_id = gr.Text(
-#                label="Model Id",
-#                max_lines=1,
-#                placeholder="Choose model",
-#                visible=True,
-#                value=model_repo_id,
-#            )
-            model_repo_id = gr.Dropdown(
-                label="Model Id",
-                choices=model_dropdown,
-                info="Choose model",
-                visible=True,
-                allow_custom_value=True,
-#                value=model_repo_id,
-                value=model_id_default,
-            )
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=True,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=42,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=7.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=20,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                model_lora_id = gr.Dropdown(
-                    label="Lora Id",
-                    choices=model_lora_dropdown,
-                    info="Choose LoRA model",
-                    visible=True,
-                    allow_custom_value=True,
-                    value=model_lora_default,
-                )
-                lora_scale = gr.Slider(
-                    label="LoRA scale",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.1,
-                    value=0.5,
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            randomize_seed,
-            width,
-            height,
-            model_repo_id,
-            seed,
-            guidance_scale,
-            num_inference_steps,
-            model_lora_id,
-            lora_scale,
-        ],
-        outputs=[result, seed],
-    )
 if __name__ == "__main__":
     demo.launch()

            torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id_default = "sd-legacy/stable-diffusion-v1-5"
+model_dropdown = ['stabilityai/sdxl-turbo', 'CompVis/stable-diffusion-v1-4', 'sd-legacy/stable-diffusion-v1-5']
 model_lora_default = "lora"
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     # добавляем обновление pipe по условию
     if model_repo_id != model_id_default:
         prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
     else:
         # добавляем lora
         pipe = get_lora_sd_pipeline(ckpt_dir='./' + model_lora_id, base_model_name_or_path=model_id_default, dtype=torch_dtype).to(device)
         prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
         negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
         print(f"LoRA scale applied: {lora_scale}")
         pipe.fuse_lora(lora_scale=lora_scale)
     # на вызов pipe с эмбеддингами
     params = {
         'prompt_embeds': prompt_embeds,
     return pipe(**params).images[0], seed
 examples = [
+    "A Elon Mask lady in a Russian embroidered kaftan is sitting on a beautiful carved veranda, holding a cup to her mouth and drinking tea from the cup. With her other hand, the girl holds a saucer. The cup and saucer are painted with gzhel. Next to the girl on the table stands a samovar, and steam can be seen above it.",
+    "Elon Mask in a jungle, cold color palette, muted colors, detailed, 8k",
+    "An Elon Mask astronaut riding a green horse",
+    "A delicious Elon Mask ceviche cheesecake slice",
 ]
 css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # Text-to-Image")
         with gr.Row():
             prompt = gr.Text(
         result = gr.Image(label="Result", show_label=False)
 if __name__ == "__main__":
     demo.launch()

lora/.DS_Store CHANGED Viewed

Binary files a/lora/.DS_Store and b/lora/.DS_Store differ

lora/unet/.DS_Store DELETED Viewed

Binary file (6.15 kB)

lora/unet/adapter_config.json CHANGED Viewed

@@ -27,10 +27,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "query",
-    "to_v",
     "to_q",
-    "value"
   ],
   "task_type": null,
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "to_q",
+    "value",
+    "query",
+    "to_v"
   ],
   "task_type": null,
   "use_dora": false,

lora/unet/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63b44b516a91a81711d3b5950e6a43adc23144a7d4c84bf94198e6c6c59240e7
 size 6397528

 version https://git-lfs.github.com/spec/v1
+oid sha256:abd77708f6b47df2914c974a88d6f15a8f468a2f998446c987314d0c0311e8d2
 size 6397528

lora/unet/config.json DELETED Viewed

@@ -1,68 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.32.2",
-  "_name_or_path": "sd-legacy/stable-diffusion-v1-5",
-  "act_fn": "silu",
-  "addition_embed_type": null,
-  "addition_embed_type_num_heads": 64,
-  "addition_time_embed_dim": null,
-  "attention_head_dim": 8,
-  "attention_type": "default",
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "center_input_sample": false,
-  "class_embed_type": null,
-  "class_embeddings_concat": false,
-  "conv_in_kernel": 3,
-  "conv_out_kernel": 3,
-  "cross_attention_dim": 768,
-  "cross_attention_norm": null,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dropout": 0.0,
-  "dual_cross_attention": false,
-  "encoder_hid_dim": null,
-  "encoder_hid_dim_type": null,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_only_cross_attention": null,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "projection_class_embeddings_input_dim": null,
-  "resnet_out_scale_factor": 1.0,
-  "resnet_skip_time_act": false,
-  "resnet_time_scale_shift": "default",
-  "reverse_transformer_layers_per_block": null,
-  "sample_size": 64,
-  "time_cond_proj_dim": null,
-  "time_embedding_act_fn": null,
-  "time_embedding_dim": null,
-  "time_embedding_type": "positional",
-  "timestep_post_act": null,
-  "transformer_layers_per_block": 1,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D"
-  ],
-  "upcast_attention": false,
-  "use_linear_projection": false
-}