Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -40,11 +40,9 @@ class Args:
|
|
| 40 |
self.seed = 42
|
| 41 |
self.guidance_scale = 2.0
|
| 42 |
self.mixed_precision = None
|
| 43 |
-
|
| 44 |
-
# Determine the device to be used for computations (CUDA if available)
|
| 45 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 46 |
|
| 47 |
-
|
|
|
|
| 48 |
|
| 49 |
def pil_to_tensor(images):
|
| 50 |
images = np.array(images).astype(np.float32) / 255.0
|
|
@@ -56,44 +54,46 @@ def pil_to_tensor(images):
|
|
| 56 |
args = Args()
|
| 57 |
|
| 58 |
# Define the data type for model weights
|
| 59 |
-
weight_dtype = torch.
|
| 60 |
|
| 61 |
if args.seed is not None:
|
| 62 |
set_seed(args.seed)
|
| 63 |
|
| 64 |
|
| 65 |
# Load scheduler, tokenizer and models.
|
|
|
|
| 66 |
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
| 67 |
vae = AutoencoderKL.from_pretrained(
|
| 68 |
args.pretrained_model_name_or_path,
|
| 69 |
subfolder="vae",
|
| 70 |
-
torch_dtype=torch.
|
| 71 |
)
|
| 72 |
unet = UNet2DConditionModel.from_pretrained(
|
| 73 |
args.pretrained_model_name_or_path,
|
| 74 |
subfolder="unet",
|
| 75 |
-
torch_dtype=torch.
|
| 76 |
)
|
| 77 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
| 78 |
args.pretrained_model_name_or_path,
|
| 79 |
subfolder="image_encoder",
|
| 80 |
-
torch_dtype=torch.
|
| 81 |
)
|
| 82 |
unet_encoder = UNet2DConditionModel_ref.from_pretrained(
|
| 83 |
args.pretrained_model_name_or_path,
|
| 84 |
subfolder="unet_encoder",
|
| 85 |
-
torch_dtype=torch.
|
| 86 |
)
|
| 87 |
text_encoder_one = CLIPTextModel.from_pretrained(
|
| 88 |
args.pretrained_model_name_or_path,
|
| 89 |
subfolder="text_encoder",
|
| 90 |
-
torch_dtype=torch.
|
| 91 |
)
|
| 92 |
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
| 93 |
args.pretrained_model_name_or_path,
|
| 94 |
subfolder="text_encoder_2",
|
| 95 |
-
torch_dtype=torch.
|
| 96 |
)
|
|
|
|
| 97 |
tokenizer_one = AutoTokenizer.from_pretrained(
|
| 98 |
args.pretrained_model_name_or_path,
|
| 99 |
subfolder="tokenizer",
|
|
@@ -113,9 +113,8 @@ image_encoder.requires_grad_(False)
|
|
| 113 |
unet_encoder.requires_grad_(False)
|
| 114 |
text_encoder_one.requires_grad_(False)
|
| 115 |
text_encoder_two.requires_grad_(False)
|
| 116 |
-
unet_encoder.
|
| 117 |
-
|
| 118 |
-
unet_encoder.eval()
|
| 119 |
|
| 120 |
pipe = TryonPipeline.from_pretrained(
|
| 121 |
args.pretrained_model_name_or_path,
|
|
@@ -129,13 +128,11 @@ pipe = TryonPipeline.from_pretrained(
|
|
| 129 |
scheduler = noise_scheduler,
|
| 130 |
image_encoder=image_encoder,
|
| 131 |
unet_encoder = unet_encoder,
|
| 132 |
-
torch_dtype=torch.
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
# pipe.enable_model_cpu_offload()
|
| 136 |
-
# pipe.enable_vae_slicing()
|
| 137 |
-
# Function to generate the image based on inputs
|
| 138 |
def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
|
|
|
|
| 139 |
# Prepare the input images as tensors
|
| 140 |
person_image = person_image.resize((args.width, args.height))
|
| 141 |
cloth_image = cloth_image.resize((args.width, args.height))
|
|
|
|
| 40 |
self.seed = 42
|
| 41 |
self.guidance_scale = 2.0
|
| 42 |
self.mixed_precision = None
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
| 45 |
+
|
| 46 |
|
| 47 |
def pil_to_tensor(images):
|
| 48 |
images = np.array(images).astype(np.float32) / 255.0
|
|
|
|
| 54 |
args = Args()
|
| 55 |
|
| 56 |
# Define the data type for model weights
|
| 57 |
+
weight_dtype = torch.float16
|
| 58 |
|
| 59 |
if args.seed is not None:
|
| 60 |
set_seed(args.seed)
|
| 61 |
|
| 62 |
|
| 63 |
# Load scheduler, tokenizer and models.
|
| 64 |
+
|
| 65 |
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
| 66 |
vae = AutoencoderKL.from_pretrained(
|
| 67 |
args.pretrained_model_name_or_path,
|
| 68 |
subfolder="vae",
|
| 69 |
+
torch_dtype=torch.float16,
|
| 70 |
)
|
| 71 |
unet = UNet2DConditionModel.from_pretrained(
|
| 72 |
args.pretrained_model_name_or_path,
|
| 73 |
subfolder="unet",
|
| 74 |
+
torch_dtype=torch.float16,
|
| 75 |
)
|
| 76 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
| 77 |
args.pretrained_model_name_or_path,
|
| 78 |
subfolder="image_encoder",
|
| 79 |
+
torch_dtype=torch.float16,
|
| 80 |
)
|
| 81 |
unet_encoder = UNet2DConditionModel_ref.from_pretrained(
|
| 82 |
args.pretrained_model_name_or_path,
|
| 83 |
subfolder="unet_encoder",
|
| 84 |
+
torch_dtype=torch.float16,
|
| 85 |
)
|
| 86 |
text_encoder_one = CLIPTextModel.from_pretrained(
|
| 87 |
args.pretrained_model_name_or_path,
|
| 88 |
subfolder="text_encoder",
|
| 89 |
+
torch_dtype=torch.float16,
|
| 90 |
)
|
| 91 |
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
| 92 |
args.pretrained_model_name_or_path,
|
| 93 |
subfolder="text_encoder_2",
|
| 94 |
+
torch_dtype=torch.float16,
|
| 95 |
)
|
| 96 |
+
|
| 97 |
tokenizer_one = AutoTokenizer.from_pretrained(
|
| 98 |
args.pretrained_model_name_or_path,
|
| 99 |
subfolder="tokenizer",
|
|
|
|
| 113 |
unet_encoder.requires_grad_(False)
|
| 114 |
text_encoder_one.requires_grad_(False)
|
| 115 |
text_encoder_two.requires_grad_(False)
|
| 116 |
+
unet_encoder.requires_grad_(False)
|
| 117 |
+
|
|
|
|
| 118 |
|
| 119 |
pipe = TryonPipeline.from_pretrained(
|
| 120 |
args.pretrained_model_name_or_path,
|
|
|
|
| 128 |
scheduler = noise_scheduler,
|
| 129 |
image_encoder=image_encoder,
|
| 130 |
unet_encoder = unet_encoder,
|
| 131 |
+
torch_dtype=torch.float16,
|
| 132 |
+
)
|
| 133 |
+
|
|
|
|
|
|
|
|
|
|
| 134 |
def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
|
| 135 |
+
pipe.to(device)
|
| 136 |
# Prepare the input images as tensors
|
| 137 |
person_image = person_image.resize((args.width, args.height))
|
| 138 |
cloth_image = cloth_image.resize((args.width, args.height))
|