Virtual-Try-On-Nymbo

Runtime error

App Files Files Community

Burman-AI commited on Mar 22, 2025

Commit

deac0f2

verified ·

1 Parent(s): ca30f98

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -71

app.py CHANGED Viewed

@@ -49,8 +49,8 @@ example_path = os.path.join(os.path.dirname(__file__), 'example')
 unet = UNet2DConditionModel.from_pretrained(
     base_path,
     subfolder="unet",
-    torch_dtype=torch.float16,
-)
 unet.requires_grad_(False)
 tokenizer_one = AutoTokenizer.from_pretrained(
     base_path,
@@ -68,28 +68,28 @@ noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler"
 text_encoder_one = CLIPTextModel.from_pretrained(
     base_path,
     subfolder="text_encoder",
-    torch_dtype=torch.float16,
-)
 text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
     base_path,
     subfolder="text_encoder_2",
-    torch_dtype=torch.float16,
-)
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
     base_path,
     subfolder="image_encoder",
-    torch_dtype=torch.float16,
-)
 vae = AutoencoderKL.from_pretrained(
     base_path,
     subfolder="vae",
-    torch_dtype=torch.float16,
-)
 UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
     base_path,
     subfolder="unet_encoder",
-    torch_dtype=torch.float16,
-)
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
 UNet_Encoder.requires_grad_(False)
@@ -118,10 +118,10 @@ pipe = TryonPipeline.from_pretrained(
     tokenizer_2=tokenizer_two,
     scheduler=noise_scheduler,
     image_encoder=image_encoder,
-    torch_dtype=torch.float16,
 )
 pipe.unet_encoder = UNet_Encoder
-@spaces.GPU
 def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
     """
     Performs the virtual try-on.
@@ -136,7 +136,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
     Returns:
         A tuple containing the output image (PIL) and the mask (PIL).
     """
-    device = "cuda"
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
@@ -170,61 +170,61 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
     args = apply_net.create_argument_parser().parse_args(
         ('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl',
-         'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
     # verbosity = getattr(args, "verbosity", None)
     pose_img = args.func(args, human_img_arg)
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))
     with torch.no_grad():
         # Extract the images
-        with torch.cuda.amp.autocast():
-            with torch.no_grad():
-                prompt = "model is wearing " + garment_des
-                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
-                with torch.inference_mode():
-                    (
-                        prompt_embeds,
-                        negative_prompt_embeds,
-                        pooled_prompt_embeds,
-                        negative_pooled_prompt_embeds,
-                    ) = pipe.encode_prompt(
-                        prompt,
-                        num_images_per_prompt=1,
-                        do_classifier_free_guidance=True,
-                        negative_prompt=negative_prompt,
-                    )
-                prompt = "a photo of " + garment_des
-                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
-                if not isinstance(prompt, List):
-                    prompt = [prompt] * 1
-                if not isinstance(negative_prompt, List):
-                    negative_prompt = [negative_prompt] * 1
-                with torch.inference_mode():
-                    (
-                        prompt_embeds_c,
-                        _,
-                        _,
-                        _,
-                    ) = pipe.encode_prompt(
-                        prompt,
-                        num_images_per_prompt=1,
-                        do_classifier_free_guidance=False,
-                        negative_prompt=negative_prompt,
-                    )
-    pose_img = tensor_transform(pose_img).unsqueeze(0).to(device, torch.float16)
-    garm_tensor = tensor_transform(garm_img).unsqueeze(0).to(device, torch.float16)
     generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
     images = pipe(
-        prompt_embeds=prompt_embeds.to(device, torch.float16),
-        negative_prompt_embeds=negative_prompt_embeds.to(device, torch.float16),
-        pooled_prompt_embeds=pooled_prompt_embeds.to(device, torch.float16),
-        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device, torch.float16),
         num_inference_steps=denoise_steps,
         generator=generator,
         strength=1.0,
-        pose_img=pose_img.to(device, torch.float16),
-        text_embeds_cloth=prompt_embeds_c.to(device, torch.float16),
-        cloth=garm_tensor.to(device, torch.float16),
         mask_image=mask,
         image=human_img,
         height=1024,
@@ -241,18 +241,18 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
     # return images[0], mask_gray
 # --- Gradio Interface ---
 # Default human examples
-human_ex_list = ''
-for ex_human in human_list_path:
-    ex_dict = {}
-    ex_dict['background'] = ex_human
-    ex_dict['layers'] = None
-    ex_dict['composite'] = None
-    human_ex_list.append(ex_dict)
 # Garment examples
-garm_list = os.listdir(os.path.join(example_path, "cloth"))
-garm_list_path = [os.path.join(example_path, "cloth", garm) for garm in garm_list]
-human_list = os.listdir(os.path.join(example_path, "human"))
-human_list_path = [os.path.join(example_path, "human", human) for human in human_list]
 image_blocks = gr.Blocks(theme="Nymbo/Alyx_Theme").queue()
 with image_blocks as demo:
     gr.HTML("<center><h1>Virtual Try-On</h1></center>")
@@ -297,4 +297,4 @@ with image_blocks as demo:
                     inputs=[imgs, garm_img, prompt, is_checked,
                             is_checked_crop, denoise_steps, seed],
                     outputs=[image_out, masked_img], api_name='tryon')
-image_blocks.launch()

 unet = UNet2DConditionModel.from_pretrained(
     base_path,
     subfolder="unet",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 unet.requires_grad_(False)
 tokenizer_one = AutoTokenizer.from_pretrained(
     base_path,
 text_encoder_one = CLIPTextModel.from_pretrained(
     base_path,
     subfolder="text_encoder",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
     base_path,
     subfolder="text_encoder_2",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
     base_path,
     subfolder="image_encoder",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 vae = AutoencoderKL.from_pretrained(
     base_path,
     subfolder="vae",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
     base_path,
     subfolder="unet_encoder",
+    torch_dtype=torch.float32, # Changed to float32
+).to("cpu") # Moved to CPU
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
 UNet_Encoder.requires_grad_(False)
     tokenizer_2=tokenizer_two,
     scheduler=noise_scheduler,
     image_encoder=image_encoder,
+    torch_dtype=torch.float32, # Changed to float32
 )
 pipe.unet_encoder = UNet_Encoder
+#@spaces.GPU # Removed GPU decorator
 def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
     """
     Performs the virtual try-on.
     Returns:
         A tuple containing the output image (PIL) and the mask (PIL).
     """
+    device = "cpu" # Changed to CPU
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
     args = apply_net.create_argument_parser().parse_args(
         ('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl',
+         'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cpu')) # Changed to CPU
     # verbosity = getattr(args, "verbosity", None)
     pose_img = args.func(args, human_img_arg)
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))
     with torch.no_grad():
         # Extract the images
+        #with torch.cuda.amp.autocast(): # Removed autocast
+        with torch.no_grad():
+            prompt = "model is wearing " + garment_des
+            negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+            with torch.inference_mode():
+                (
+                    prompt_embeds,
+                    negative_prompt_embeds,
+                    pooled_prompt_embeds,
+                    negative_pooled_prompt_embeds,
+                ) = pipe.encode_prompt(
+                    prompt,
+                    num_images_per_prompt=1,
+                    do_classifier_free_guidance=True,
+                    negative_prompt=negative_prompt,
+                )
+            prompt = "a photo of " + garment_des
+            negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+            if not isinstance(prompt, List):
+                prompt = [prompt] * 1
+            if not isinstance(negative_prompt, List):
+                negative_prompt = [negative_prompt] * 1
+            with torch.inference_mode():
+                (
+                    prompt_embeds_c,
+                    _,
+                    _,
+                    _,
+                ) = pipe.encode_prompt(
+                    prompt,
+                    num_images_per_prompt=1,
+                    do_classifier_free_guidance=False,
+                    negative_prompt=negative_prompt,
+                )
+    pose_img = tensor_transform(pose_img).unsqueeze(0).to(device, torch.float32) # Changed to float32
+    garm_tensor = tensor_transform(garm_img).unsqueeze(0).to(device, torch.float32) # Changed to float32
     generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
     images = pipe(
+        prompt_embeds=prompt_embeds.to(device, torch.float32), # Changed to float32
+        negative_prompt_embeds=negative_prompt_embeds.to(device, torch.float32), # Changed to float32
+        pooled_prompt_embeds=pooled_prompt_embeds.to(device, torch.float32), # Changed to float32
+        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device, torch.float32), # Changed to float32
         num_inference_steps=denoise_steps,
         generator=generator,
         strength=1.0,
+        pose_img=pose_img.to(device, torch.float32), # Changed to float32
+        text_embeds_cloth=prompt_embeds_c.to(device, torch.float32), # Changed to float32
+        cloth=garm_tensor.to(device, torch.float32), # Changed to float32
         mask_image=mask,
         image=human_img,
         height=1024,
     # return images[0], mask_gray
 # --- Gradio Interface ---
 # Default human examples
+# human_ex_list =''
+# for ex_human in human_list_path:
+#    ex_dict = {}
+#    ex_dict['background'] = ex_human
+#    ex_dict['layers'] = None
+#    ex_dict['composite'] = None
+#    human_ex_list.append(ex_dict)
 # Garment examples
+#garm_list = os.listdir(os.path.join(example_path, "cloth"))
+#garm_list_path = [os.path.join(example_path, "cloth", garm) for garm in garm_list]
+#human_list = os.listdir(os.path.join(example_path, "human"))
+#human_list_path = [os.path.join(example_path, "human", human) for human in human_list]
 image_blocks = gr.Blocks(theme="Nymbo/Alyx_Theme").queue()
 with image_blocks as demo:
     gr.HTML("<center><h1>Virtual Try-On</h1></center>")
                     inputs=[imgs, garm_img, prompt, is_checked,
                             is_checked_crop, denoise_steps, seed],
                     outputs=[image_out, masked_img], api_name='tryon')
+image_blocks.launch()