StableDiffusion-3.5-Large-lora-test

Paused

App Files Files Community

1inkusFace commited on May 31, 2025

Commit

104c1eb

verified ·

1 Parent(s): cb5e540

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -26

app.py CHANGED Viewed

@@ -86,11 +86,11 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
     vae=None,
     #vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
      #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
-    text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
     # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
-    text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
   #  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
-    text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
@@ -99,17 +99,17 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
     #torch_dtype=torch.bfloat16,
     use_safetensors=True,
 )
-text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
-text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
-text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
 ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
-pipe.transformer=ll_transformer.eval()
 pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
 #pipe.to(accelerator.device)
 pipe.to(device=device, dtype=torch.bfloat16)
-upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cpu'))
 MAX_SEED = np.iinfo(np.int32).max
@@ -127,11 +127,6 @@ def infer_60(
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
-    pipe.vae=vaeX.to('cpu')
-    pipe.config.transformer=ll_transformer
-    pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
@@ -154,7 +149,6 @@ def infer_60(
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
-    upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
@@ -177,11 +171,6 @@ def infer_90(
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
-    pipe.vae=vaeX.to('cpu')
-    pipe.config.transformer=ll_transformer
-    pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
@@ -204,7 +193,6 @@ def infer_90(
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
-    upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
@@ -227,11 +215,6 @@ def infer_110(
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
-    pipe.vae=vaeX.to('cpu')
-    pipe.config.transformer=ll_transformer
-    pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
-    pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
@@ -254,7 +237,6 @@ def infer_110(
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
-    upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)

     vae=None,
     #vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
      #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
+    #text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
     # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
+    #text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
   #  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
+    #text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
     #torch_dtype=torch.bfloat16,
     use_safetensors=True,
 )
+#text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
+#text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
+#text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
 ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
+pipe.transformer=ll_transformer
 pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
 #pipe.to(accelerator.device)
 pipe.to(device=device, dtype=torch.bfloat16)
+upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cuda'))
 MAX_SEED = np.iinfo(np.int32).max
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     print('-- generating image --')
     sd35_path = f"sd35ll_{timestamp}.png"
     sd_image.save(sd35_path,optimize=False,compress_level=0)
     pyx.upload_to_ftp(sd35_path)
     with torch.no_grad():
         upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
         upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)