Burman-AI commited on
Commit
deac0f2
·
verified ·
1 Parent(s): ca30f98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -71
app.py CHANGED
@@ -49,8 +49,8 @@ example_path = os.path.join(os.path.dirname(__file__), 'example')
49
  unet = UNet2DConditionModel.from_pretrained(
50
  base_path,
51
  subfolder="unet",
52
- torch_dtype=torch.float16,
53
- )
54
  unet.requires_grad_(False)
55
  tokenizer_one = AutoTokenizer.from_pretrained(
56
  base_path,
@@ -68,28 +68,28 @@ noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler"
68
  text_encoder_one = CLIPTextModel.from_pretrained(
69
  base_path,
70
  subfolder="text_encoder",
71
- torch_dtype=torch.float16,
72
- )
73
  text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
74
  base_path,
75
  subfolder="text_encoder_2",
76
- torch_dtype=torch.float16,
77
- )
78
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
79
  base_path,
80
  subfolder="image_encoder",
81
- torch_dtype=torch.float16,
82
- )
83
  vae = AutoencoderKL.from_pretrained(
84
  base_path,
85
  subfolder="vae",
86
- torch_dtype=torch.float16,
87
- )
88
  UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
89
  base_path,
90
  subfolder="unet_encoder",
91
- torch_dtype=torch.float16,
92
- )
93
  parsing_model = Parsing(0)
94
  openpose_model = OpenPose(0)
95
  UNet_Encoder.requires_grad_(False)
@@ -118,10 +118,10 @@ pipe = TryonPipeline.from_pretrained(
118
  tokenizer_2=tokenizer_two,
119
  scheduler=noise_scheduler,
120
  image_encoder=image_encoder,
121
- torch_dtype=torch.float16,
122
  )
123
  pipe.unet_encoder = UNet_Encoder
124
- @spaces.GPU
125
  def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
126
  """
127
  Performs the virtual try-on.
@@ -136,7 +136,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
136
  Returns:
137
  A tuple containing the output image (PIL) and the mask (PIL).
138
  """
139
- device = "cuda"
140
  openpose_model.preprocessor.body_estimation.model.to(device)
141
  pipe.to(device)
142
  pipe.unet_encoder.to(device)
@@ -170,61 +170,61 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
170
  human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
171
  args = apply_net.create_argument_parser().parse_args(
172
  ('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl',
173
- 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
174
  # verbosity = getattr(args, "verbosity", None)
175
  pose_img = args.func(args, human_img_arg)
176
  pose_img = pose_img[:, :, ::-1]
177
  pose_img = Image.fromarray(pose_img).resize((768, 1024))
178
  with torch.no_grad():
179
  # Extract the images
180
- with torch.cuda.amp.autocast():
181
- with torch.no_grad():
182
- prompt = "model is wearing " + garment_des
183
- negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
184
- with torch.inference_mode():
185
- (
186
- prompt_embeds,
187
- negative_prompt_embeds,
188
- pooled_prompt_embeds,
189
- negative_pooled_prompt_embeds,
190
- ) = pipe.encode_prompt(
191
- prompt,
192
- num_images_per_prompt=1,
193
- do_classifier_free_guidance=True,
194
- negative_prompt=negative_prompt,
195
- )
196
- prompt = "a photo of " + garment_des
197
- negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
198
- if not isinstance(prompt, List):
199
- prompt = [prompt] * 1
200
- if not isinstance(negative_prompt, List):
201
- negative_prompt = [negative_prompt] * 1
202
- with torch.inference_mode():
203
- (
204
- prompt_embeds_c,
205
- _,
206
- _,
207
- _,
208
- ) = pipe.encode_prompt(
209
- prompt,
210
- num_images_per_prompt=1,
211
- do_classifier_free_guidance=False,
212
- negative_prompt=negative_prompt,
213
- )
214
- pose_img = tensor_transform(pose_img).unsqueeze(0).to(device, torch.float16)
215
- garm_tensor = tensor_transform(garm_img).unsqueeze(0).to(device, torch.float16)
216
  generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
217
  images = pipe(
218
- prompt_embeds=prompt_embeds.to(device, torch.float16),
219
- negative_prompt_embeds=negative_prompt_embeds.to(device, torch.float16),
220
- pooled_prompt_embeds=pooled_prompt_embeds.to(device, torch.float16),
221
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device, torch.float16),
222
  num_inference_steps=denoise_steps,
223
  generator=generator,
224
  strength=1.0,
225
- pose_img=pose_img.to(device, torch.float16),
226
- text_embeds_cloth=prompt_embeds_c.to(device, torch.float16),
227
- cloth=garm_tensor.to(device, torch.float16),
228
  mask_image=mask,
229
  image=human_img,
230
  height=1024,
@@ -241,18 +241,18 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
241
  # return images[0], mask_gray
242
  # --- Gradio Interface ---
243
  # Default human examples
244
- human_ex_list = ''
245
- for ex_human in human_list_path:
246
- ex_dict = {}
247
- ex_dict['background'] = ex_human
248
- ex_dict['layers'] = None
249
- ex_dict['composite'] = None
250
- human_ex_list.append(ex_dict)
251
  # Garment examples
252
- garm_list = os.listdir(os.path.join(example_path, "cloth"))
253
- garm_list_path = [os.path.join(example_path, "cloth", garm) for garm in garm_list]
254
- human_list = os.listdir(os.path.join(example_path, "human"))
255
- human_list_path = [os.path.join(example_path, "human", human) for human in human_list]
256
  image_blocks = gr.Blocks(theme="Nymbo/Alyx_Theme").queue()
257
  with image_blocks as demo:
258
  gr.HTML("<center><h1>Virtual Try-On</h1></center>")
@@ -297,4 +297,4 @@ with image_blocks as demo:
297
  inputs=[imgs, garm_img, prompt, is_checked,
298
  is_checked_crop, denoise_steps, seed],
299
  outputs=[image_out, masked_img], api_name='tryon')
300
- image_blocks.launch()
 
49
  unet = UNet2DConditionModel.from_pretrained(
50
  base_path,
51
  subfolder="unet",
52
+ torch_dtype=torch.float32, # Changed to float32
53
+ ).to("cpu") # Moved to CPU
54
  unet.requires_grad_(False)
55
  tokenizer_one = AutoTokenizer.from_pretrained(
56
  base_path,
 
68
  text_encoder_one = CLIPTextModel.from_pretrained(
69
  base_path,
70
  subfolder="text_encoder",
71
+ torch_dtype=torch.float32, # Changed to float32
72
+ ).to("cpu") # Moved to CPU
73
  text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
74
  base_path,
75
  subfolder="text_encoder_2",
76
+ torch_dtype=torch.float32, # Changed to float32
77
+ ).to("cpu") # Moved to CPU
78
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
79
  base_path,
80
  subfolder="image_encoder",
81
+ torch_dtype=torch.float32, # Changed to float32
82
+ ).to("cpu") # Moved to CPU
83
  vae = AutoencoderKL.from_pretrained(
84
  base_path,
85
  subfolder="vae",
86
+ torch_dtype=torch.float32, # Changed to float32
87
+ ).to("cpu") # Moved to CPU
88
  UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
89
  base_path,
90
  subfolder="unet_encoder",
91
+ torch_dtype=torch.float32, # Changed to float32
92
+ ).to("cpu") # Moved to CPU
93
  parsing_model = Parsing(0)
94
  openpose_model = OpenPose(0)
95
  UNet_Encoder.requires_grad_(False)
 
118
  tokenizer_2=tokenizer_two,
119
  scheduler=noise_scheduler,
120
  image_encoder=image_encoder,
121
+ torch_dtype=torch.float32, # Changed to float32
122
  )
123
  pipe.unet_encoder = UNet_Encoder
124
+ #@spaces.GPU # Removed GPU decorator
125
  def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
126
  """
127
  Performs the virtual try-on.
 
136
  Returns:
137
  A tuple containing the output image (PIL) and the mask (PIL).
138
  """
139
+ device = "cpu" # Changed to CPU
140
  openpose_model.preprocessor.body_estimation.model.to(device)
141
  pipe.to(device)
142
  pipe.unet_encoder.to(device)
 
170
  human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
171
  args = apply_net.create_argument_parser().parse_args(
172
  ('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl',
173
+ 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cpu')) # Changed to CPU
174
  # verbosity = getattr(args, "verbosity", None)
175
  pose_img = args.func(args, human_img_arg)
176
  pose_img = pose_img[:, :, ::-1]
177
  pose_img = Image.fromarray(pose_img).resize((768, 1024))
178
  with torch.no_grad():
179
  # Extract the images
180
+ #with torch.cuda.amp.autocast(): # Removed autocast
181
+ with torch.no_grad():
182
+ prompt = "model is wearing " + garment_des
183
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
184
+ with torch.inference_mode():
185
+ (
186
+ prompt_embeds,
187
+ negative_prompt_embeds,
188
+ pooled_prompt_embeds,
189
+ negative_pooled_prompt_embeds,
190
+ ) = pipe.encode_prompt(
191
+ prompt,
192
+ num_images_per_prompt=1,
193
+ do_classifier_free_guidance=True,
194
+ negative_prompt=negative_prompt,
195
+ )
196
+ prompt = "a photo of " + garment_des
197
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
198
+ if not isinstance(prompt, List):
199
+ prompt = [prompt] * 1
200
+ if not isinstance(negative_prompt, List):
201
+ negative_prompt = [negative_prompt] * 1
202
+ with torch.inference_mode():
203
+ (
204
+ prompt_embeds_c,
205
+ _,
206
+ _,
207
+ _,
208
+ ) = pipe.encode_prompt(
209
+ prompt,
210
+ num_images_per_prompt=1,
211
+ do_classifier_free_guidance=False,
212
+ negative_prompt=negative_prompt,
213
+ )
214
+ pose_img = tensor_transform(pose_img).unsqueeze(0).to(device, torch.float32) # Changed to float32
215
+ garm_tensor = tensor_transform(garm_img).unsqueeze(0).to(device, torch.float32) # Changed to float32
216
  generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
217
  images = pipe(
218
+ prompt_embeds=prompt_embeds.to(device, torch.float32), # Changed to float32
219
+ negative_prompt_embeds=negative_prompt_embeds.to(device, torch.float32), # Changed to float32
220
+ pooled_prompt_embeds=pooled_prompt_embeds.to(device, torch.float32), # Changed to float32
221
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device, torch.float32), # Changed to float32
222
  num_inference_steps=denoise_steps,
223
  generator=generator,
224
  strength=1.0,
225
+ pose_img=pose_img.to(device, torch.float32), # Changed to float32
226
+ text_embeds_cloth=prompt_embeds_c.to(device, torch.float32), # Changed to float32
227
+ cloth=garm_tensor.to(device, torch.float32), # Changed to float32
228
  mask_image=mask,
229
  image=human_img,
230
  height=1024,
 
241
  # return images[0], mask_gray
242
  # --- Gradio Interface ---
243
  # Default human examples
244
+ # human_ex_list =''
245
+ # for ex_human in human_list_path:
246
+ # ex_dict = {}
247
+ # ex_dict['background'] = ex_human
248
+ # ex_dict['layers'] = None
249
+ # ex_dict['composite'] = None
250
+ # human_ex_list.append(ex_dict)
251
  # Garment examples
252
+ #garm_list = os.listdir(os.path.join(example_path, "cloth"))
253
+ #garm_list_path = [os.path.join(example_path, "cloth", garm) for garm in garm_list]
254
+ #human_list = os.listdir(os.path.join(example_path, "human"))
255
+ #human_list_path = [os.path.join(example_path, "human", human) for human in human_list]
256
  image_blocks = gr.Blocks(theme="Nymbo/Alyx_Theme").queue()
257
  with image_blocks as demo:
258
  gr.HTML("<center><h1>Virtual Try-On</h1></center>")
 
297
  inputs=[imgs, garm_img, prompt, is_checked,
298
  is_checked_crop, denoise_steps, seed],
299
  outputs=[image_out, masked_img], api_name='tryon')
300
+ image_blocks.launch()