ColorFlow

Runtime error

App Files Files Community

facehuggingjay commited on 14 days ago

Commit

e972aca

verified ·

1 Parent(s): 78d1206

claude

Browse files

Files changed (1) hide show

app.py +15 -14

app.py CHANGED Viewed

@@ -99,13 +99,14 @@ transform = transforms.Compose([
     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
 ])
 weight_dtype = torch.float16
 # line model
 line_model_path = model_global_path + '/LE/erika.pth'
 line_model = res_skip()
 line_model.load_state_dict(torch.load(line_model_path))
 line_model.eval()
-line_model.cuda()
 # screen model
 global opt
@@ -116,7 +117,7 @@ ScreenModel.setup(opt)
 ScreenModel.eval()
 image_processor = CLIPImageProcessor()
-image_encoder = CLIPVisionModelWithProjection.from_pretrained(model_global_path + '/image_encoder/').to('cuda')
 examples = [
@@ -218,8 +219,8 @@ def load_ckpt(input_style):
         ckpt_key_t = torch.load(ckpt_path + 'transformer_lora.bin', map_location='cpu')
         transformer.load_state_dict(ckpt_key_t, strict=False)
-        transformer.to('cuda', dtype=weight_dtype)
-        ColorGuider.to('cuda', dtype=weight_dtype)
         pipeline = ColorFlowPixArtAlphaPipeline.from_pretrained(
             pretrained_model_name_or_path,
@@ -230,12 +231,12 @@ def load_ckpt(input_style):
             variant=None,
             torch_dtype=weight_dtype,
         )
-        pipeline = pipeline.to("cuda")
         block_out_channels = [128, 128, 256, 512, 512]
         MultiResNetModel = MultiHiddenResNetModel(block_out_channels, len(block_out_channels))
         MultiResNetModel.load_state_dict(torch.load(ckpt_path + 'MultiResNetModel.bin', map_location='cpu'), strict=False)
-        MultiResNetModel.to('cuda', dtype=weight_dtype)
     elif input_style == "GrayImage(ScreenStyle)":
         ckpt_path = model_global_path + '/GraySD/'
@@ -245,8 +246,8 @@ def load_ckpt(input_style):
             pretrained_model_name_or_path, subfolder="unet", revision=None, variant=None
         )
         ColorGuider = ColorGuiderSDModel.from_pretrained(ckpt_path)
-        ColorGuider.to('cuda', dtype=weight_dtype)
-        unet.to('cuda', dtype=weight_dtype)
         pipeline = ColorFlowSDPipeline.from_pretrained(
             pretrained_model_name_or_path,
@@ -266,12 +267,12 @@ def load_ckpt(input_style):
         )
         pipeline.unet.add_adapter(unet_lora_config)
         pipeline.unet.load_state_dict(torch.load(ckpt_path + 'unet_lora.bin', map_location='cpu'), strict=False)
-        pipeline = pipeline.to("cuda")
         block_out_channels = [128, 128, 256, 512, 512]
         MultiResNetModel = MultiHiddenResNetModel(block_out_channels, len(block_out_channels))
         MultiResNetModel.load_state_dict(torch.load(ckpt_path + 'MultiResNetModel.bin', map_location='cpu'), strict=False)
-        MultiResNetModel.to('cuda', dtype=weight_dtype)
@@ -312,7 +313,7 @@ def extract_lines(image):
     patch = np.ones((1, 1, rows, cols), dtype="float32")
     patch[0, 0, 0:src.shape[0], 0:src.shape[1]] = src
-    tensor = torch.from_numpy(patch).cuda()
     with torch.no_grad():
         y = line_model(tensor)
@@ -440,7 +441,7 @@ def colorize_image(VAE_input, input_context, reference_images, resolution, seed,
                     idx_x = idx_list[k][1]
                     combined_image.paste(reference_patches_pil[ref_index].resize((tar_width//2-2, tar_height//2-2)), (tar_width//2 * idx_x + 1, tar_height//2 * idx_y + 1))
     gr.Info("Model inference in progress...")
-    generator = torch.Generator(device='cuda').manual_seed(seed)
     image = pipeline(
         "manga", cond_image=combined_image, cond_mask=validation_mask, num_inference_steps=num_inference_steps, generator=generator
     ).images[0]
@@ -455,8 +456,8 @@ def colorize_image(VAE_input, input_context, reference_images, resolution, seed,
         bottom = top + new_height
         center_crop = image.crop((left, top, right, bottom))
         up_img = center_crop.resize(query_image_vae.size)
-        test_low_color = transform(up_img).unsqueeze(0).to('cuda', dtype=weight_dtype)
-        query_image_vae = transform(query_image_vae).unsqueeze(0).to('cuda', dtype=weight_dtype)
         h_color, hidden_list_color = pipeline.vae._encode(test_low_color,return_dict = False, hidden_flag = True)
         h_bw, hidden_list_bw = pipeline.vae._encode(query_image_vae, return_dict = False, hidden_flag = True)

     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
 ])
 weight_dtype = torch.float16
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # line model
 line_model_path = model_global_path + '/LE/erika.pth'
 line_model = res_skip()
 line_model.load_state_dict(torch.load(line_model_path))
 line_model.eval()
+line_model.to(device)
 # screen model
 global opt
 ScreenModel.eval()
 image_processor = CLIPImageProcessor()
+image_encoder = CLIPVisionModelWithProjection.from_pretrained(model_global_path + '/image_encoder/').to(device)
 examples = [
         ckpt_key_t = torch.load(ckpt_path + 'transformer_lora.bin', map_location='cpu')
         transformer.load_state_dict(ckpt_key_t, strict=False)
+        transformer.to(device, dtype=weight_dtype)
+        ColorGuider.to(device, dtype=weight_dtype)
         pipeline = ColorFlowPixArtAlphaPipeline.from_pretrained(
             pretrained_model_name_or_path,
             variant=None,
             torch_dtype=weight_dtype,
         )
+        pipeline = pipeline.to(device)
         block_out_channels = [128, 128, 256, 512, 512]
         MultiResNetModel = MultiHiddenResNetModel(block_out_channels, len(block_out_channels))
         MultiResNetModel.load_state_dict(torch.load(ckpt_path + 'MultiResNetModel.bin', map_location='cpu'), strict=False)
+        MultiResNetModel.to(device, dtype=weight_dtype)
     elif input_style == "GrayImage(ScreenStyle)":
         ckpt_path = model_global_path + '/GraySD/'
             pretrained_model_name_or_path, subfolder="unet", revision=None, variant=None
         )
         ColorGuider = ColorGuiderSDModel.from_pretrained(ckpt_path)
+        ColorGuider.to(device, dtype=weight_dtype)
+        unet.to(device, dtype=weight_dtype)
         pipeline = ColorFlowSDPipeline.from_pretrained(
             pretrained_model_name_or_path,
         )
         pipeline.unet.add_adapter(unet_lora_config)
         pipeline.unet.load_state_dict(torch.load(ckpt_path + 'unet_lora.bin', map_location='cpu'), strict=False)
+        pipeline = pipeline.to(device)
         block_out_channels = [128, 128, 256, 512, 512]
         MultiResNetModel = MultiHiddenResNetModel(block_out_channels, len(block_out_channels))
         MultiResNetModel.load_state_dict(torch.load(ckpt_path + 'MultiResNetModel.bin', map_location='cpu'), strict=False)
+        MultiResNetModel.to(device, dtype=weight_dtype)
     patch = np.ones((1, 1, rows, cols), dtype="float32")
     patch[0, 0, 0:src.shape[0], 0:src.shape[1]] = src
+    tensor = torch.from_numpy(patch).to(device)
     with torch.no_grad():
         y = line_model(tensor)
                     idx_x = idx_list[k][1]
                     combined_image.paste(reference_patches_pil[ref_index].resize((tar_width//2-2, tar_height//2-2)), (tar_width//2 * idx_x + 1, tar_height//2 * idx_y + 1))
     gr.Info("Model inference in progress...")
+    generator = torch.Generator(device=device).manual_seed(seed)
     image = pipeline(
         "manga", cond_image=combined_image, cond_mask=validation_mask, num_inference_steps=num_inference_steps, generator=generator
     ).images[0]
         bottom = top + new_height
         center_crop = image.crop((left, top, right, bottom))
         up_img = center_crop.resize(query_image_vae.size)
+        test_low_color = transform(up_img).unsqueeze(0).to(device, dtype=weight_dtype)
+        query_image_vae = transform(query_image_vae).unsqueeze(0).to(device, dtype=weight_dtype)
         h_color, hidden_list_color = pipeline.vae._encode(test_low_color,return_dict = False, hidden_flag = True)
         h_bw, hidden_list_bw = pipeline.vae._encode(query_image_vae, return_dict = False, hidden_flag = True)