Update app.py
Browse files
app.py
CHANGED
|
@@ -86,11 +86,11 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
|
|
| 86 |
vae=None,
|
| 87 |
#vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
|
| 88 |
#scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
|
| 89 |
-
text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
|
| 90 |
# text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
|
| 91 |
-
text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
| 92 |
# text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
| 93 |
-
text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
| 94 |
# text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
| 95 |
#tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
|
| 96 |
#tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
|
|
@@ -99,17 +99,17 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
|
|
| 99 |
#torch_dtype=torch.bfloat16,
|
| 100 |
use_safetensors=True,
|
| 101 |
)
|
| 102 |
-
text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 103 |
-
text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 104 |
-
text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 105 |
ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 106 |
-
pipe.transformer=ll_transformer
|
| 107 |
pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
|
| 108 |
|
| 109 |
#pipe.to(accelerator.device)
|
| 110 |
pipe.to(device=device, dtype=torch.bfloat16)
|
| 111 |
|
| 112 |
-
upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('
|
| 113 |
|
| 114 |
MAX_SEED = np.iinfo(np.int32).max
|
| 115 |
|
|
@@ -127,11 +127,6 @@ def infer_60(
|
|
| 127 |
num_inference_steps,
|
| 128 |
progress=gr.Progress(track_tqdm=True),
|
| 129 |
):
|
| 130 |
-
pipe.vae=vaeX.to('cpu')
|
| 131 |
-
pipe.config.transformer=ll_transformer
|
| 132 |
-
pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
|
| 133 |
-
pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
|
| 134 |
-
pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
|
| 135 |
seed = random.randint(0, MAX_SEED)
|
| 136 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 137 |
print('-- generating image --')
|
|
@@ -154,7 +149,6 @@ def infer_60(
|
|
| 154 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 155 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 156 |
pyx.upload_to_ftp(sd35_path)
|
| 157 |
-
upscaler_2.to(torch.device('cuda'))
|
| 158 |
with torch.no_grad():
|
| 159 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 160 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|
|
@@ -177,11 +171,6 @@ def infer_90(
|
|
| 177 |
num_inference_steps,
|
| 178 |
progress=gr.Progress(track_tqdm=True),
|
| 179 |
):
|
| 180 |
-
pipe.vae=vaeX.to('cpu')
|
| 181 |
-
pipe.config.transformer=ll_transformer
|
| 182 |
-
pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
|
| 183 |
-
pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
|
| 184 |
-
pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
|
| 185 |
seed = random.randint(0, MAX_SEED)
|
| 186 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 187 |
print('-- generating image --')
|
|
@@ -204,7 +193,6 @@ def infer_90(
|
|
| 204 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 205 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 206 |
pyx.upload_to_ftp(sd35_path)
|
| 207 |
-
upscaler_2.to(torch.device('cuda'))
|
| 208 |
with torch.no_grad():
|
| 209 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 210 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|
|
@@ -227,11 +215,6 @@ def infer_110(
|
|
| 227 |
num_inference_steps,
|
| 228 |
progress=gr.Progress(track_tqdm=True),
|
| 229 |
):
|
| 230 |
-
pipe.vae=vaeX.to('cpu')
|
| 231 |
-
pipe.config.transformer=ll_transformer
|
| 232 |
-
pipe.config.text_encoder=text_encoder #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
|
| 233 |
-
pipe.config.text_encoder_2=text_encoder_2 #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
|
| 234 |
-
pipe.config.text_encoder_3=text_encoder_3 #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
|
| 235 |
seed = random.randint(0, MAX_SEED)
|
| 236 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 237 |
print('-- generating image --')
|
|
@@ -254,7 +237,6 @@ def infer_110(
|
|
| 254 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 255 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 256 |
pyx.upload_to_ftp(sd35_path)
|
| 257 |
-
upscaler_2.to(torch.device('cuda'))
|
| 258 |
with torch.no_grad():
|
| 259 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 260 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|
|
|
|
| 86 |
vae=None,
|
| 87 |
#vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
|
| 88 |
#scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
|
| 89 |
+
#text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
|
| 90 |
# text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
|
| 91 |
+
#text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
| 92 |
# text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
| 93 |
+
#text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
| 94 |
# text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
| 95 |
#tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
|
| 96 |
#tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
|
|
|
|
| 99 |
#torch_dtype=torch.bfloat16,
|
| 100 |
use_safetensors=True,
|
| 101 |
)
|
| 102 |
+
#text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 103 |
+
#text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 104 |
+
#text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 105 |
ll_transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True).to(torch.device("cuda:0"), dtype=torch.bfloat16)
|
| 106 |
+
pipe.transformer=ll_transformer
|
| 107 |
pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/UltraReal.safetensors")
|
| 108 |
|
| 109 |
#pipe.to(accelerator.device)
|
| 110 |
pipe.to(device=device, dtype=torch.bfloat16)
|
| 111 |
|
| 112 |
+
upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cuda'))
|
| 113 |
|
| 114 |
MAX_SEED = np.iinfo(np.int32).max
|
| 115 |
|
|
|
|
| 127 |
num_inference_steps,
|
| 128 |
progress=gr.Progress(track_tqdm=True),
|
| 129 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
seed = random.randint(0, MAX_SEED)
|
| 131 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 132 |
print('-- generating image --')
|
|
|
|
| 149 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 150 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 151 |
pyx.upload_to_ftp(sd35_path)
|
|
|
|
| 152 |
with torch.no_grad():
|
| 153 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 154 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|
|
|
|
| 171 |
num_inference_steps,
|
| 172 |
progress=gr.Progress(track_tqdm=True),
|
| 173 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
seed = random.randint(0, MAX_SEED)
|
| 175 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 176 |
print('-- generating image --')
|
|
|
|
| 193 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 194 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 195 |
pyx.upload_to_ftp(sd35_path)
|
|
|
|
| 196 |
with torch.no_grad():
|
| 197 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 198 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|
|
|
|
| 215 |
num_inference_steps,
|
| 216 |
progress=gr.Progress(track_tqdm=True),
|
| 217 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
seed = random.randint(0, MAX_SEED)
|
| 219 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
| 220 |
print('-- generating image --')
|
|
|
|
| 237 |
sd35_path = f"sd35ll_{timestamp}.png"
|
| 238 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
| 239 |
pyx.upload_to_ftp(sd35_path)
|
|
|
|
| 240 |
with torch.no_grad():
|
| 241 |
upscale = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
| 242 |
upscale2 = upscaler_2(upscale, tiling=True, tile_width=256, tile_height=256)
|