1inkusFace commited on
Commit
66478d5
·
verified ·
1 Parent(s): 229742d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -15,9 +15,7 @@ from models.transformer_sd3 import SD3Transformer2DModel
15
  #from diffusers import StableDiffusion3Pipeline
16
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
17
  from transformers import CLIPTokenizer, T5TokenizerFast
18
- #from diffusers import SD3Transformer2DModel, AutoencoderKL
19
  from diffusers import AutoencoderKL
20
- #from models.transformer_sd3 import SD3Transformer2DModel
21
  from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
22
 
23
  from image_gen_aux import UpscaleWithModel
@@ -36,6 +34,7 @@ torch.backends.cudnn.deterministic = False
36
  torch.backends.cudnn.benchmark = False
37
  #torch.backends.cuda.preferred_blas_library="cublas"
38
  #torch.backends.cuda.preferred_linalg_library="cusolver"
 
39
 
40
  hftoken = os.getenv("HF_TOKEN")
41
 
@@ -58,37 +57,40 @@ def upload_to_ftp(filename):
58
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
59
  torch_dtype = torch.bfloat16
60
 
61
- transformer = SD3Transformer2DModel.from_pretrained(
62
- model_path, subfolder="transformer", torch_dtype=torch.bfloat16
63
- )
64
-
65
- vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
66
-
67
- pipe = StableDiffusion3Pipeline.from_pretrained(
68
- #"stabilityai # stable-diffusion-3.5-large",
69
- "ford442/stable-diffusion-3.5-large-bf16",
70
- #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
71
- text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
72
- text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
73
- text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
74
- #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
75
- #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
76
- tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
77
- torch_dtype=torch.bfloat16,
78
- transformer=transformer,
79
- vae=None
80
- #use_safetensors=False,
81
- )
82
-
83
- #pipe.to(device=device, dtype=torch.bfloat16)
 
 
 
84
 
85
- pipe.to(device)
86
- pipe.vae=vaeX.to(device)
87
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
88
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
89
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
90
-
91
 
 
 
 
92
 
93
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
94
 
@@ -120,18 +122,17 @@ def infer(
120
  image_encoder_path=None,
121
  progress=gr.Progress(track_tqdm=True),
122
  ):
123
-
124
  pipe.text_encoder=text_encoder
125
  pipe.text_encoder_2=text_encoder_2
126
  pipe.text_encoder_3=text_encoder_3
127
-
128
  pipe.init_ipadapter(
129
  ip_adapter_path=ipadapter_path,
130
  image_encoder_path=image_encoder_path,
131
  nb_token=64,
132
  )
133
  upscaler_2.to(torch.device('cpu'))
134
- torch.set_float32_matmul_precision("highest")
 
135
  seed = random.randint(0, MAX_SEED)
136
  generator = torch.Generator(device='cuda').manual_seed(seed)
137
  enhanced_prompt = prompt
@@ -140,25 +141,25 @@ def infer(
140
  sd_image_a = Image.open(latent_file.name).convert('RGB')
141
  print("-- using image file and loading ip-adapter --")
142
  #sd_image_a.resize((height,width), Image.LANCZOS)
143
- sd_image_a.resize((768,768), Image.LANCZOS)
144
  if latent_file_2 is not None: # Check if a latent file is provided
145
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
146
- sd_image_b.resize((768,768), Image.LANCZOS)
147
  else:
148
  sd_image_b = None
149
  if latent_file_3 is not None: # Check if a latent file is provided
150
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
151
- sd_image_c.resize((768,768), Image.LANCZOS)
152
  else:
153
  sd_image_c = None
154
  if latent_file_4 is not None: # Check if a latent file is provided
155
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
156
- sd_image_d.resize((768,768), Image.LANCZOS)
157
  else:
158
  sd_image_d = None
159
  if latent_file_5 is not None: # Check if a latent file is provided
160
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
161
- sd_image_e.resize((768,768), Image.LANCZOS)
162
  else:
163
  sd_image_e = None
164
  print('-- generating image --')
 
15
  #from diffusers import StableDiffusion3Pipeline
16
  from transformers import CLIPTextModelWithProjection, T5EncoderModel
17
  from transformers import CLIPTokenizer, T5TokenizerFast
 
18
  from diffusers import AutoencoderKL
 
19
  from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
20
 
21
  from image_gen_aux import UpscaleWithModel
 
34
  torch.backends.cudnn.benchmark = False
35
  #torch.backends.cuda.preferred_blas_library="cublas"
36
  #torch.backends.cuda.preferred_linalg_library="cusolver"
37
+ torch.set_float32_matmul_precision("highest")
38
 
39
  hftoken = os.getenv("HF_TOKEN")
40
 
 
57
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
58
  torch_dtype = torch.bfloat16
59
 
60
+ def load_and_prepare_models():
61
+ transformer = SD3Transformer2DModel.from_pretrained(
62
+ model_path, subfolder="transformer" #, torch_dtype=torch.bfloat16
63
+ )
64
+ vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
65
+ pipe = StableDiffusion3Pipeline.from_pretrained(
66
+ #"stabilityai # stable-diffusion-3.5-large",
67
+ "ford442/stable-diffusion-3.5-large-bf16",
68
+ #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
69
+ text_encoder=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
70
+ text_encoder_2=None, #CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
71
+ text_encoder_3=None, #T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
72
+ #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
73
+ #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
74
+ tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
75
+ #torch_dtype=torch.bfloat16,
76
+ transformer=transformer,
77
+ vae=None
78
+ #use_safetensors=False,
79
+ )
80
+ torch.cuda.empty_cache()
81
+ torch.cuda.reset_peak_memory_stats()
82
+ pipe.to(device=device, dtype=torch.bfloat16)
83
+ pipe.vae=vaeX.to(device)
84
+ upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
85
+ return pipe, upscaler
86
 
 
 
87
  text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
88
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
89
  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 
90
 
91
+ pipe, upscaler_2 = load_and_prepare_models()
92
+
93
+ #pipe.to(device)
94
 
95
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
96
 
 
122
  image_encoder_path=None,
123
  progress=gr.Progress(track_tqdm=True),
124
  ):
 
125
  pipe.text_encoder=text_encoder
126
  pipe.text_encoder_2=text_encoder_2
127
  pipe.text_encoder_3=text_encoder_3
 
128
  pipe.init_ipadapter(
129
  ip_adapter_path=ipadapter_path,
130
  image_encoder_path=image_encoder_path,
131
  nb_token=64,
132
  )
133
  upscaler_2.to(torch.device('cpu'))
134
+ torch.cuda.empty_cache()
135
+ torch.cuda.reset_peak_memory_stats()
136
  seed = random.randint(0, MAX_SEED)
137
  generator = torch.Generator(device='cuda').manual_seed(seed)
138
  enhanced_prompt = prompt
 
141
  sd_image_a = Image.open(latent_file.name).convert('RGB')
142
  print("-- using image file and loading ip-adapter --")
143
  #sd_image_a.resize((height,width), Image.LANCZOS)
144
+ sd_image_a.resize((width,height), Image.LANCZOS)
145
  if latent_file_2 is not None: # Check if a latent file is provided
146
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
147
+ sd_image_b.resize((768,height), Image.LANCZOS)
148
  else:
149
  sd_image_b = None
150
  if latent_file_3 is not None: # Check if a latent file is provided
151
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
152
+ sd_image_c.resize((width,height), Image.LANCZOS)
153
  else:
154
  sd_image_c = None
155
  if latent_file_4 is not None: # Check if a latent file is provided
156
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
157
+ sd_image_d.resize((width,height), Image.LANCZOS)
158
  else:
159
  sd_image_d = None
160
  if latent_file_5 is not None: # Check if a latent file is provided
161
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
162
+ sd_image_e.resize((width,height), Image.LANCZOS)
163
  else:
164
  sd_image_e = None
165
  print('-- generating image --')